You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/09 06:06:49 UTC

[1/2] systemml git commit: [SYSTEMML-2119] Support for seq operations in codegen (local/spark)

Repository: systemml
Updated Branches:
  refs/heads/master 1a37cfad4 -> d0b4373fa


http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
index 314e118..a3e1dfe 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
@@ -136,8 +136,8 @@ public class SpoofSPInstruction extends SPInstruction {
 			
 			if( _out.getDataType()==DataType.MATRIX ) {
 				//execute codegen block operation
-				out = in.mapPartitionsToPair(new CellwiseFunction(
-					_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
+				out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(),
+					_classBytes, bcVect2, bcMatrices, scalars, mcIn.getRowsPerBlock()), true);
 				
 				if( (op.getCellType()==CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock())
 					|| (op.getCellType()==CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
@@ -153,8 +153,8 @@ public class SpoofSPInstruction extends SPInstruction {
 				updateOutputMatrixCharacteristics(sec, op);
 			}
 			else { //SCALAR
-				out = in.mapPartitionsToPair(new CellwiseFunction(
-					_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
+				out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(),
+					_classBytes, bcVect2, bcMatrices, scalars, mcIn.getRowsPerBlock()), true);
 				MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
 				sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
 			}
@@ -163,11 +163,9 @@ public class SpoofSPInstruction extends SPInstruction {
 		{
 			SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
 			AggOp[] aggOps = op.getAggOps();
-			
-			MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(
-				_class.getName(), _classBytes,  bcVect2, bcMatrices, scalars))
+			MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(),
+				_classBytes, bcVect2, bcMatrices, scalars, mcIn.getRowsPerBlock()))
 				.values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps) );
-			
 			sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
 		}
 		else if(_class.getSuperclass() == SpoofOuterProduct.class) //OUTER
@@ -207,8 +205,8 @@ public class SpoofSPInstruction extends SPInstruction {
 			SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
 			long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() :
 				op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
-			RowwiseFunction fmmc = new RowwiseFunction(_class.getName(),
-				_classBytes, bcVect2, bcMatrices, scalars, (int)mcIn.getCols(), (int)clen2);
+			RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2,
+				bcMatrices, scalars, mcIn.getRowsPerBlock(), (int)mcIn.getCols(), (int)clen2);
 			out = in.mapPartitionsToPair(fmmc, op.getRowType()==RowType.ROW_AGG
 					|| op.getRowType() == RowType.NO_AGG);
 			
@@ -422,12 +420,15 @@ public class SpoofSPInstruction extends SPInstruction {
 	{
 		private static final long serialVersionUID = -7926980450209760212L;
 
+		private final int _brlen;
 		private final int _clen;
 		private final int _clen2;
 		private SpoofRowwise _op = null;
 		
-		public RowwiseFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices, ArrayList<ScalarObject> scalars, int clen, int clen2) {
+		public RowwiseFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices,
+			ArrayList<ScalarObject> scalars, int brlen, int clen, int clen2) {
 			super(className, classBytes, bcInd, bcMatrices, scalars);
+			_brlen = brlen;
 			_clen = clen;
 			_clen2 = clen;
 		}
@@ -453,11 +454,12 @@ public class SpoofSPInstruction extends SPInstruction {
 				Tuple2<MatrixIndexes,MatrixBlock[]> e = arg.next();
 				MatrixIndexes ixIn = e._1();
 				MatrixBlock[] blkIn = e._2();
+				long rix = (ixIn.getRowIndex()-1) * _brlen; //0-based
 				
 				//prepare output and execute single-threaded operator
 				ArrayList<MatrixBlock> inputs = getAllMatrixInputs(ixIn, blkIn);
 				blkOut = aggIncr ? blkOut : new MatrixBlock();
-				blkOut = _op.execute(inputs, _scalars, blkOut, false, aggIncr);
+				blkOut = _op.execute(inputs, _scalars, blkOut, false, aggIncr, rix);
 				if( !aggIncr ) {
 					MatrixIndexes ixOut = new MatrixIndexes(ixIn.getRowIndex(),
 						_op.getRowType()!=RowType.NO_AGG ? 1 : ixIn.getColumnIndex());
@@ -482,10 +484,12 @@ public class SpoofSPInstruction extends SPInstruction {
 	{
 		private static final long serialVersionUID = -8209188316939435099L;
 		
-		private SpoofOperator _op = null;
+		private SpoofCellwise _op = null;
+		private final int _brlen;
 		
-		public CellwiseFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices, ArrayList<ScalarObject> scalars) {
+		public CellwiseFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices, ArrayList<ScalarObject> scalars, int brlen) {
 			super(className, classBytes, bcInd, bcMatrices, scalars);
+			_brlen = brlen;
 		}
 		
 		@Override
@@ -495,7 +499,7 @@ public class SpoofSPInstruction extends SPInstruction {
 			//lazy load of shipped class
 			if( _op == null ) {
 				Class<?> loadedClass = CodegenUtils.getClassSync(_className, _classBytes);
-				_op = (SpoofOperator) CodegenUtils.createInstance(loadedClass); 
+				_op = (SpoofCellwise) CodegenUtils.createInstance(loadedClass); 
 			}
 			
 			List<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<>();
@@ -507,19 +511,20 @@ public class SpoofSPInstruction extends SPInstruction {
 				MatrixIndexes ixOut = ixIn; 
 				MatrixBlock blkOut = new MatrixBlock();
 				ArrayList<MatrixBlock> inputs = getAllMatrixInputs(ixIn, blkIn);
+				long rix = (ixIn.getRowIndex()-1) * _brlen; //0-based
 				
 				//execute core operation
-				if(((SpoofCellwise)_op).getCellType()==CellType.FULL_AGG) {
-					ScalarObject obj = _op.execute(inputs, _scalars, 1);
+				if( _op.getCellType()==CellType.FULL_AGG ) {
+					ScalarObject obj = _op.execute(inputs, _scalars, 1, rix);
 					blkOut.reset(1, 1);
-					blkOut.quickSetValue(0, 0, obj.getDoubleValue());	
+					blkOut.quickSetValue(0, 0, obj.getDoubleValue());
 				}
 				else {
-					if(((SpoofCellwise)_op).getCellType()==CellType.ROW_AGG)
+					if( _op.getCellType()==CellType.ROW_AGG )
 						ixOut = new MatrixIndexes(ixOut.getRowIndex(), 1);
 					else if(((SpoofCellwise)_op).getCellType()==CellType.COL_AGG)
 						ixOut = new MatrixIndexes(1, ixOut.getColumnIndex());
-					blkOut = _op.execute(inputs, _scalars, blkOut);
+					blkOut = _op.execute(inputs, _scalars, blkOut, 1, rix);
 				}
 				ret.add(new Tuple2<>(ixOut, blkOut));
 			}
@@ -532,10 +537,12 @@ public class SpoofSPInstruction extends SPInstruction {
 	{
 		private static final long serialVersionUID = -5224519291577332734L;
 		
-		private SpoofOperator _op = null;
+		private SpoofMultiAggregate _op = null;
+		private final int _brlen;
 		
-		public MultiAggregateFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices, ArrayList<ScalarObject> scalars) {
+		public MultiAggregateFunction(String className, byte[] classBytes, boolean[] bcInd, ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices, ArrayList<ScalarObject> scalars, int brlen) {
 			super(className, classBytes, bcInd, bcMatrices, scalars);
+			_brlen = brlen;
 		}
 		
 		@Override
@@ -545,13 +552,14 @@ public class SpoofSPInstruction extends SPInstruction {
 			//lazy load of shipped class
 			if( _op == null ) {
 				Class<?> loadedClass = CodegenUtils.getClassSync(_className, _classBytes);
-				_op = (SpoofOperator) CodegenUtils.createInstance(loadedClass); 
+				_op = (SpoofMultiAggregate) CodegenUtils.createInstance(loadedClass); 
 			}
-				
+			
 			//execute core operation
 			ArrayList<MatrixBlock> inputs = getAllMatrixInputs(arg._1(), arg._2());
 			MatrixBlock blkOut = new MatrixBlock();
-			blkOut = _op.execute(inputs, _scalars, blkOut);
+			long rix = (arg._1().getRowIndex()-1) * _brlen; //0-based
+			blkOut = _op.execute(inputs, _scalars, blkOut, 1, rix);
 			
 			return new Tuple2<>(arg._1(), blkOut);
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
index f9b4a4a..76469de 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
@@ -57,6 +57,7 @@ public class CellwiseTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME19 = TEST_NAME+19; //sum(ifelse(true,Y,Z))+sum(ifelse(false,Y,Z))
 	private static final String TEST_NAME20 = TEST_NAME+20; //bitwAnd() operation
 	private static final String TEST_NAME21 = TEST_NAME+21; //relu operation, (X>0)*dout
+	private static final String TEST_NAME22 = TEST_NAME+22; //sum(X * seq(1,N) + t(seq(M,1)))
 	
 	private static final String TEST_DIR = "functions/codegen/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + CellwiseTmplTest.class.getSimpleName() + "/";
@@ -69,7 +70,7 @@ public class CellwiseTmplTest extends AutomatedTestBase
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for( int i=1; i<=21; i++ ) {
+		for( int i=1; i<=22; i++ ) {
 			addTestConfiguration( TEST_NAME+i, new TestConfiguration(
 					TEST_CLASS_DIR, TEST_NAME+i, new String[] {String.valueOf(i)}) );
 		}
@@ -366,6 +367,21 @@ public class CellwiseTmplTest extends AutomatedTestBase
 	public void testCodegenCellwiseRewrite21_sp() {
 		testCodegenIntegration( TEST_NAME21, true, ExecType.SPARK );
 	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite22() {
+		testCodegenIntegration( TEST_NAME22, true, ExecType.CP );
+	}
+
+	@Test
+	public void testCodegenCellwise22() {
+		testCodegenIntegration( TEST_NAME22, false, ExecType.CP );
+	}
+
+	@Test
+	public void testCodegenCellwiseRewrite22_sp() {
+		testCodegenIntegration( TEST_NAME22, true, ExecType.SPARK );
+	}
 
 	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
 	{
@@ -433,6 +449,8 @@ public class CellwiseTmplTest extends AutomatedTestBase
 				Assert.assertTrue(!heavyHittersContainsSubString("uack+"));
 			else if( testname.equals(TEST_NAME17) )
 				Assert.assertTrue(!heavyHittersContainsSubString("xor"));
+			else if( testname.equals(TEST_NAME22) )
+				Assert.assertTrue(!heavyHittersContainsSubString("seq"));
 		}
 		finally {
 			rtplatform = platformOld;

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index 6cb853d..e07c543 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -77,6 +77,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME38 = TEST_NAME+"38"; //sigmoid(X/rowSums)
 	private static final String TEST_NAME39 = TEST_NAME+"39"; //BitwAnd operation
 	private static final String TEST_NAME40 = TEST_NAME+"40"; //relu operation -> (X>0)* dout
+	private static final String TEST_NAME41 = TEST_NAME+"41"; //X*rowSums(X/seq(1,N)+t(seq(M,1)))
 	
 	private static final String TEST_DIR = "functions/codegen/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + RowAggTmplTest.class.getSimpleName() + "/";
@@ -88,7 +89,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for(int i=1; i<=40; i++)
+		for(int i=1; i<=41; i++)
 			addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) );
 	}
 	
@@ -691,6 +692,21 @@ public class RowAggTmplTest extends AutomatedTestBase
 	public void testCodegenRowAgg40SP() {
 		testCodegenIntegration( TEST_NAME40, false, ExecType.SPARK );
 	}
+	
+	@Test
+	public void testCodegenRowAggRewrite41CP() {
+		testCodegenIntegration( TEST_NAME41, true, ExecType.CP );
+	}
+
+	@Test
+	public void testCodegenRowAgg41CP() {
+		testCodegenIntegration( TEST_NAME41, false, ExecType.CP );
+	}
+
+	@Test
+	public void testCodegenRowAgg41SP() {
+		testCodegenIntegration( TEST_NAME41, false, ExecType.SPARK );
+	}
 
 	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
 	{
@@ -748,6 +764,8 @@ public class RowAggTmplTest extends AutomatedTestBase
 					&& !heavyHittersContainsSubString("cbind"));
 			if( testname.equals(TEST_NAME36) )
 				Assert.assertTrue(!heavyHittersContainsSubString("xor"));
+			if( testname.equals(TEST_NAME41) )
+				Assert.assertTrue(!heavyHittersContainsSubString("seq"));
 		}
 		finally {
 			rtplatform = platformOld;

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/scripts/functions/codegen/cellwisetmpl22.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl22.R b/src/test/scripts/functions/codegen/cellwisetmpl22.R
new file mode 100644
index 0000000..09c7fe5
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl22.R
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = matrix(7, 500, 2);
+t1 = as.matrix(seq(1,nrow(X))) %*% matrix(1,1,2);
+t2 = as.matrix(seq(nrow(X),1)) %*% matrix(1,1,2);
+R = X * t1 + t2;
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/scripts/functions/codegen/cellwisetmpl22.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl22.dml b/src/test/scripts/functions/codegen/cellwisetmpl22.dml
new file mode 100644
index 0000000..1132619
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl22.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(7, 500, 2);
+
+while(FALSE){}
+
+R = X * seq(1,nrow(X)) + seq(nrow(X),1);
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/scripts/functions/codegen/rowAggPattern41.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern41.R b/src/test/scripts/functions/codegen/rowAggPattern41.R
new file mode 100644
index 0000000..947c446
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern41.R
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = matrix(7, 500, 2);
+t1 = as.matrix(seq(1,nrow(X))) %*% matrix(1,1,2);
+t2 = as.matrix(seq(nrow(X),1))%*% matrix(1,1,2); 
+R = X * (rowSums(X/t1 + t2)%*%matrix(1,1,2));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/test/scripts/functions/codegen/rowAggPattern41.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern41.dml b/src/test/scripts/functions/codegen/rowAggPattern41.dml
new file mode 100644
index 0000000..60b8a47
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern41.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(7, 500, 2);
+
+while(FALSE){}
+
+R = X * rowSums(X/seq(1,nrow(X)) + seq(nrow(X),1));
+
+write(R, $1)


[2/2] systemml git commit: [SYSTEMML-2119] Support for seq operations in codegen (local/spark)

Posted by mb...@apache.org.
[SYSTEMML-2119] Support for seq operations in codegen (local/spark)

This patch extends the code generation framework to support sequence
operators, in cell, magg, and row templates. Along with the typical
extensions of candidate exploration, templates, and minor optimizer
extensions, this also required a runtime extension for distributed
operations to feed the global row index into generated operators.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d0b4373f
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d0b4373f
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d0b4373f

Branch: refs/heads/master
Commit: d0b4373fa13951d159e0d2443b703f0d1a93bfc6
Parents: 1a37cfa
Author: Matthias Boehm <mb...@gmail.com>
Authored: Tue May 8 22:22:59 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Tue May 8 23:07:20 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/DataGenOp.java   |   4 +
 .../sysml/hops/codegen/SpoofCompiler.java       |   6 +-
 .../sysml/hops/codegen/cplan/CNodeBinary.java   |  79 ++++---
 .../sysml/hops/codegen/cplan/CNodeCell.java     |   2 +-
 .../sysml/hops/codegen/cplan/CNodeMultiAgg.java |   2 +-
 .../sysml/hops/codegen/cplan/CNodeRow.java      |   4 +-
 .../opt/PlanSelectionFuseCostBasedV2.java       |   4 +-
 .../hops/codegen/template/TemplateCell.java     |  50 ++--
 .../hops/codegen/template/TemplateRow.java      |  42 ++--
 .../hops/codegen/template/TemplateUtils.java    |  13 ++
 .../sysml/hops/rewrite/HopRewriteUtils.java     |  24 +-
 .../sysml/runtime/codegen/SpoofCellwise.java    | 234 ++++++++++---------
 .../runtime/codegen/SpoofMultiAggregate.java    |  46 ++--
 .../sysml/runtime/codegen/SpoofRowwise.java     |  54 +++--
 .../instructions/spark/SpoofSPInstruction.java  |  58 +++--
 .../functions/codegen/CellwiseTmplTest.java     |  20 +-
 .../functions/codegen/RowAggTmplTest.java       |  20 +-
 .../scripts/functions/codegen/cellwisetmpl22.R  |  31 +++
 .../functions/codegen/cellwisetmpl22.dml        |  28 +++
 .../scripts/functions/codegen/rowAggPattern41.R |  31 +++
 .../functions/codegen/rowAggPattern41.dml       |  28 +++
 21 files changed, 518 insertions(+), 262 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index 0c6b586..3101721 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -366,6 +366,10 @@ public class DataGenOp extends Hop implements MultiThreadedHop
 		return _paramIndexMap;
 	}
 	
+	public Hop getParam(String key) {
+		return getInput().get(getParamIndex(key));
+	}
+	
 	public int getParamIndex(String key) {
 		return _paramIndexMap.get(key);
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 932b0be..6ef8dc4 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -665,8 +665,10 @@ public class SpoofCompiler
 			CNodeTpl tpl = e.getValue().getValue();
 			Hop[] inHops = e.getValue().getKey();
 			
-			//remove invalid plans with null inputs 
-			if( Arrays.stream(inHops).anyMatch(h -> (h==null)) )
+			//remove invalid plans with null, empty, or all scalar inputs 
+			if( inHops == null || inHops.length == 0
+				|| Arrays.stream(inHops).anyMatch(h -> (h==null))
+				|| Arrays.stream(inHops).allMatch(h -> h.isScalar()))
 				continue;
 			
 			//perform simplifications and cse rewrites

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index 4aee712..80a7f83 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -52,6 +52,7 @@ public class CNodeBinary extends CNode
 		LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL,NOTEQUAL,
 		MIN, MAX, AND, OR, XOR, LOG, LOG_NZ, POW,
 		BITWAND,
+		SEQ_RIX,
 		MINUS1_MULT, MINUS_NZ;
 
 		public static boolean contains(String value) {
@@ -214,7 +215,9 @@ public class CNodeBinary extends CNode
 					return "    double %TMP% = ( (%IN1% != 0) != (%IN2% != 0) ) ? 1 : 0;\n";
 				case BITWAND:
 					return "    double %TMP% = LibSpoofPrimitives.bwAnd(%IN1%, %IN2%);\n";
-
+				case SEQ_RIX:
+					return "    double %TMP% = %IN1% + grix * %IN2%;\n"; //0-based global rix
+				
 				default: 
 					throw new RuntimeException("Invalid binary type: "+this.toString());
 			}
@@ -409,6 +412,7 @@ public class CNodeBinary extends CNode
 			case AND:                      return "b(&)";
 			case XOR:                      return "b(xor)";
 			case BITWAND:                  return "b(bitwAnd)";
+			case SEQ_RIX:                  return "b(seqr)";
 			case MINUS1_MULT:              return "b(1-*)";
 			case MINUS_NZ:                 return "b(-nz)";
 			default: return "b("+_type.name().toLowerCase()+")";
@@ -420,7 +424,7 @@ public class CNodeBinary extends CNode
 	{
 		switch(_type) {
 			//VECT
-			case VECT_MULT_ADD: 
+			case VECT_MULT_ADD:
 			case VECT_DIV_ADD:
 			case VECT_MINUS_ADD:
 			case VECT_PLUS_ADD:
@@ -428,10 +432,10 @@ public class CNodeBinary extends CNode
 			case VECT_MIN_ADD:
 			case VECT_MAX_ADD:
 			case VECT_EQUAL_ADD: 
-			case VECT_NOTEQUAL_ADD: 
-			case VECT_LESS_ADD: 
-			case VECT_LESSEQUAL_ADD: 
-			case VECT_GREATER_ADD: 
+			case VECT_NOTEQUAL_ADD:
+			case VECT_LESS_ADD:
+			case VECT_LESSEQUAL_ADD:
+			case VECT_GREATER_ADD:
 			case VECT_GREATEREQUAL_ADD:
 			case VECT_CBIND_ADD:
 			case VECT_XOR_ADD:
@@ -452,8 +456,8 @@ public class CNodeBinary extends CNode
 				_cols = _inputs.get(1)._cols;
 				_dataType = DataType.MATRIX;
 				break;
-				
-			case VECT_DIV_SCALAR: 	
+			
+			case VECT_DIV_SCALAR:
 			case VECT_MULT_SCALAR:
 			case VECT_MINUS_SCALAR:
 			case VECT_PLUS_SCALAR:
@@ -463,13 +467,13 @@ public class CNodeBinary extends CNode
 			case VECT_MIN_SCALAR:
 			case VECT_MAX_SCALAR:
 			case VECT_EQUAL_SCALAR: 
-			case VECT_NOTEQUAL_SCALAR: 
-			case VECT_LESS_SCALAR: 
-			case VECT_LESSEQUAL_SCALAR: 
+			case VECT_NOTEQUAL_SCALAR:
+			case VECT_LESS_SCALAR:
+			case VECT_LESSEQUAL_SCALAR:
 			case VECT_GREATER_SCALAR: 
 			case VECT_GREATEREQUAL_SCALAR:
 			
-			case VECT_DIV: 	
+			case VECT_DIV:
 			case VECT_MULT:
 			case VECT_MINUS:
 			case VECT_PLUS:
@@ -477,12 +481,12 @@ public class CNodeBinary extends CNode
 			case VECT_BITWAND:
 			case VECT_MIN:
 			case VECT_MAX:
-			case VECT_EQUAL: 
-			case VECT_NOTEQUAL: 
-			case VECT_LESS: 
-			case VECT_LESSEQUAL: 
-			case VECT_GREATER: 
-			case VECT_GREATEREQUAL:	
+			case VECT_EQUAL:
+			case VECT_NOTEQUAL:
+			case VECT_LESS:
+			case VECT_LESSEQUAL:
+			case VECT_GREATER:
+			case VECT_GREATEREQUAL:
 				boolean scalarVector = (_inputs.get(0).getDataType()==DataType.SCALAR);
 				_rows = _inputs.get(scalarVector ? 1 : 0)._rows;
 				_cols = _inputs.get(scalarVector ? 1 : 0)._cols;
@@ -494,39 +498,40 @@ public class CNodeBinary extends CNode
 				_cols = _inputs.get(1)._cols;
 				_dataType = DataType.MATRIX;
 				break;
-				
-			case DOT_PRODUCT: 
+			
+			case DOT_PRODUCT:
 			
 			//SCALAR Arithmetic
-			case MULT: 
-			case DIV: 
-			case PLUS: 
-			case MINUS: 
+			case MULT:
+			case DIV:
+			case PLUS:
+			case MINUS:
 			case MINUS1_MULT:
 			case MINUS_NZ:
-			case MODULUS: 
-			case INTDIV: 	
+			case MODULUS:
+			case INTDIV:
 			//SCALAR Comparison
-			case LESS: 
-			case LESSEQUAL: 
+			case LESS:
+			case LESSEQUAL:
 			case GREATER: 
-			case GREATEREQUAL: 
-			case EQUAL: 
-			case NOTEQUAL: 
+			case GREATEREQUAL:
+			case EQUAL:
+			case NOTEQUAL:
 			//SCALAR LOGIC
-			case MIN: 
-			case MAX: 
-			case AND: 
+			case MIN:
+			case MAX:
+			case AND:
 			case OR:
 			case XOR:
 			case BITWAND:
-			case LOG: 
+			case LOG:
 			case LOG_NZ:
-			case POW: 
+			case POW:
+			case SEQ_RIX:
 				_rows = 0;
 				_cols = 0;
 				_dataType= DataType.SCALAR;
-				break;	
+				break;
 		}
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
index 3b30124..e97a00a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
@@ -41,7 +41,7 @@ public class CNodeCell extends CNodeTpl
 			+ "  public %TMP%() {\n"
 			+ "    super(CellType.%TYPE%, %SPARSE_SAFE%, %AGG_OP%);\n"
 			+ "  }\n"
-			+ "  protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, int rix, int cix) { \n"
+			+ "  protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, long grix, int rix, int cix) { \n"
 			+ "%BODY_dense%"
 			+ "    return %OUT%;\n"
 			+ "  }\n"

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
index fd39c7c..b0bd03a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
@@ -44,7 +44,7 @@ public class CNodeMultiAgg extends CNodeTpl
 			+ "    super(%SPARSE_SAFE%, %AGG_OP%);\n"
 			+ "  }\n"
 			+ "  protected void genexec(double a, SideInput[] b, double[] scalars, double[] c, "
-					+ "int m, int n, int rix, int cix) { \n"
+					+ "int m, int n, long grix, int rix, int cix) { \n"
 			+ "%BODY_dense%"
 			+ "  }\n"
 			+ "}\n";

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
index e6aa53a..e21ff2b 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
@@ -41,10 +41,10 @@ public class CNodeRow extends CNodeTpl
 			+ "  public %TMP%() {\n"
 			+ "    super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, %VECT_MEM%);\n"
 			+ "  }\n"
-			+ "  protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { \n"
+			+ "  protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int len, long grix, int rix) { \n"
 			+ "%BODY_dense%"
 			+ "  }\n"
-			+ "  protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, int rix) { \n"
+			+ "  protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, long grix, int rix) { \n"
 			+ "%BODY_sparse%"
 			+ "  }\n"
 			+ "}\n";

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index 1e5bcf3..3db4ce8 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -44,6 +44,7 @@ import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.DataOpTypes;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp2;
@@ -718,7 +719,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 			|| (hop instanceof AggBinaryOp && (inRow || !hop.dimsKnown()
 				|| (hop.getDim1()!=1 && hop.getDim2()!=1)))
 			|| (HopRewriteUtils.isTransposeOperation(hop)
-				&& (hop.getDim1()!=1 && hop.getDim2()!=1))
+				&& (hop.getDim1()!=1 && hop.getDim2()!=1)
+				&& !HopRewriteUtils.isDataGenOp(hop.getInput().get(0),DataGenMethod.SEQ))
 			|| (hop instanceof AggUnaryOp && inRow);
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
index cfac2d7..8eaffaf 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
@@ -29,10 +29,12 @@ import java.util.List;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
+import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.DataOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOp3;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
@@ -52,6 +54,7 @@ import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
 import org.apache.sysml.hops.codegen.cplan.CNodeTernary;
 import org.apache.sysml.hops.codegen.cplan.CNodeTernary.TernaryType;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
+import org.apache.sysml.parser.Statement;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.matrix.data.Pair;
 
@@ -77,7 +80,9 @@ public class TemplateCell extends TemplateBase
 		return hop.dimsKnown() && isValidOperation(hop)
 				&& !(hop.getDim1()==1 && hop.getDim2()==1) 
 			|| (hop instanceof IndexingOp && hop.getInput().get(0).getDim2() >= 0
-				&& (((IndexingOp)hop).isColLowerEqualsUpper() || hop.getDim2()==1));
+				&& (((IndexingOp)hop).isColLowerEqualsUpper() || hop.getDim2()==1))
+			|| (HopRewriteUtils.isDataGenOpWithLiteralInputs(hop, DataGenMethod.SEQ)
+				&& HopRewriteUtils.hasOnlyUnaryBinaryParents(hop, true));
 	}
 
 	@Override
@@ -96,7 +101,9 @@ public class TemplateCell extends TemplateBase
 		//merge of other cell tpl possible
 		return (!isClosed() && (isValidOperation(hop) 
 			|| (hop instanceof AggBinaryOp && hop.getInput().indexOf(input)==0 
-				&& HopRewriteUtils.isTransposeOperation(input))));
+				&& HopRewriteUtils.isTransposeOperation(input))))
+			|| (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, DataGenMethod.SEQ)
+				&& HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false));
 	}
 
 	@Override
@@ -177,16 +184,14 @@ public class TemplateCell extends TemplateBase
 		
 		//construct cnode for current hop
 		CNode out = null;
-		if(hop instanceof UnaryOp)
-		{
+		if(hop instanceof UnaryOp) {
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0));
 			
 			String primitiveOpName = ((UnaryOp)hop).getOp().name();
 			out = new CNodeUnary(cdata1, UnaryType.valueOf(primitiveOpName));
 		}
-		else if(hop instanceof BinaryOp)
-		{
+		else if(hop instanceof BinaryOp) {
 			BinaryOp bop = (BinaryOp) hop;
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID());
@@ -200,8 +205,7 @@ public class TemplateCell extends TemplateBase
 			out = new CNodeBinary(cdata1, cdata2, 
 				BinType.valueOf(primitiveOpName));
 		}
-		else if(hop instanceof TernaryOp) 
-		{
+		else if(hop instanceof TernaryOp) {
 			TernaryOp top = (TernaryOp) hop;
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID());
@@ -216,27 +220,34 @@ public class TemplateCell extends TemplateBase
 			out = new CNodeTernary(cdata1, cdata2, cdata3, 
 				TernaryType.valueOf(top.getOp().name()));
 		}
-		else if( hop instanceof ParameterizedBuiltinOp ) 
-		{
+		else if( hop instanceof ParameterizedBuiltinOp ) {
 			CNode cdata1 = tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID());
 			cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0));
 			
 			CNode cdata2 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID());
 			CNode cdata3 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID());
 			TernaryType ttype = (cdata2.isLiteral() && cdata2.getVarname().equals("Double.NaN")) ? 
-					TernaryType.REPLACE_NAN : TernaryType.REPLACE;
+				TernaryType.REPLACE_NAN : TernaryType.REPLACE;
 			out = new CNodeTernary(cdata1, cdata2, cdata3, ttype);
 		}
-		else if( hop instanceof IndexingOp ) 
-		{
+		else if( hop instanceof IndexingOp ) {
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			out = new CNodeTernary(cdata1, 
-					TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), 
-					TemplateUtils.createCNodeData(hop.getInput().get(4), true),
-					TernaryType.LOOKUP_RC1);
+				TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), 
+				TemplateUtils.createCNodeData(hop.getInput().get(4), true),
+				TernaryType.LOOKUP_RC1);
 		}
-		else if( HopRewriteUtils.isTransposeOperation(hop) ) 
-		{
+		else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) {
+			CNodeData from = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID()));
+			CNodeData to = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID()));
+			CNodeData incr = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID()));
+			if( Double.parseDouble(from.getVarname()) > Double.parseDouble(to.getVarname())
+				&& Double.parseDouble(incr.getVarname()) > 0 ) {
+				incr = TemplateUtils.createCNodeData(new LiteralOp("-"+incr.getVarname()), true);
+			}
+			out = new CNodeBinary(from, incr, BinType.SEQ_RIX);
+		}
+		else if( HopRewriteUtils.isTransposeOperation(hop) ) {
 			out = TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), 
 				hop, tmp, compileLiterals);
 			//correct indexing types of existing lookups
@@ -246,8 +257,7 @@ public class TemplateCell extends TemplateBase
 			if( out instanceof CNodeData && !inHops.contains(hop.getInput().get(0)) )
 				inHops.add(hop.getInput().get(0));
 		}
-		else if( hop instanceof AggUnaryOp )
-		{
+		else if( hop instanceof AggUnaryOp ) {
 			//aggregation handled in template implementation (note: we do not compile 
 			//^2 of SUM_SQ into the operator to simplify the detection of single operators)
 			out = tmp.get(hop.getInput().get(0).getHopID());

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 1e996d9..fb153de 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
+import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LiteralOp;
@@ -48,10 +49,13 @@ import org.apache.sysml.hops.codegen.cplan.CNodeUnary.UnaryType;
 import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp1;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOpN;
+import org.apache.sysml.hops.HopsException;
+import org.apache.sysml.parser.Statement;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
@@ -134,6 +138,8 @@ public class TemplateRow extends TemplateBase
 				&& hop.getDim1() > 1 && input.getDim1()>1)
 			|| (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && hop.getInput().get(0).isMatrix() && hop.dimsKnown())
 			|| (HopRewriteUtils.isNary(hop, OpOpN.CBIND) && hop.getInput().get(0).isMatrix() && hop.dimsKnown())
+			|| (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, DataGenMethod.SEQ)
+				&& HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false))
 			|| (hop instanceof AggBinaryOp
 				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
 			 	&& (input.getDim2()==1 || (input==hop.getInput().get(1) 
@@ -328,15 +334,23 @@ public class TemplateRow extends TemplateBase
 				}
 			}
 		}
-		else if( HopRewriteUtils.isTransposeOperation(hop) ) 
-		{
+		else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) {
+			CNodeData from = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID()));
+			CNodeData to = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID()));
+			CNodeData incr = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID()));
+			if( Double.parseDouble(from.getVarname()) > Double.parseDouble(to.getVarname())
+				&& Double.parseDouble(incr.getVarname()) > 0 ) {
+				incr = TemplateUtils.createCNodeData(new LiteralOp("-"+incr.getVarname()), true);
+			}
+			out = new CNodeBinary(from, incr, BinType.SEQ_RIX);
+		}
+		else if( HopRewriteUtils.isTransposeOperation(hop) ) {
 			out = TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), 
 				hop, tmp, compileLiterals);
 			if( out instanceof CNodeData && !inHops.contains(hop.getInput().get(0)) )
 				inHops.add(hop.getInput().get(0));
 		}
-		else if(hop instanceof UnaryOp)
-		{
+		else if(hop instanceof UnaryOp) {
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			
 			// if one input is a matrix then we need to do vector by scalar operations
@@ -360,8 +374,7 @@ public class TemplateRow extends TemplateBase
 				out = new CNodeUnary(cdata1, UnaryType.valueOf(primitiveOpName));
 			}
 		}
-		else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) 
-		{
+		else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) {
 			//special case for cbind with zeros
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			CNode cdata2 = null;
@@ -425,8 +438,7 @@ public class TemplateRow extends TemplateBase
 				out = new CNodeBinary(cdata1, cdata2, BinType.valueOf(primitiveOpName));
 			}
 		}
-		else if(hop instanceof TernaryOp) 
-		{
+		else if(hop instanceof TernaryOp) {
 			TernaryOp top = (TernaryOp) hop;
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID());
@@ -440,8 +452,7 @@ public class TemplateRow extends TemplateBase
 			out = new CNodeTernary(cdata1, cdata2, cdata3, 
 				TernaryType.valueOf(top.getOp().toString()));
 		}
-		else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) 
-		{
+		else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) {
 			CNode[] inputs = new CNode[hop.getInput().size()];
 			for( int i=0; i<hop.getInput().size(); i++ ) {
 				Hop c = hop.getInput().get(i);
@@ -454,19 +465,16 @@ public class TemplateRow extends TemplateBase
 			}
 			out = new CNodeNary(inputs, NaryType.VECT_CBIND);
 		}
-		else if( hop instanceof ParameterizedBuiltinOp )
-		{
+		else if( hop instanceof ParameterizedBuiltinOp ) {
 			CNode cdata1 = tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID());
 			cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0));
-			
 			CNode cdata2 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID());
 			CNode cdata3 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID());
 			TernaryType ttype = (cdata2.isLiteral() && cdata2.getVarname().equals("Double.NaN")) ?
-					TernaryType.REPLACE_NAN : TernaryType.REPLACE;
+				TernaryType.REPLACE_NAN : TernaryType.REPLACE;
 			out = new CNodeTernary(cdata1, cdata2, cdata3, ttype);
 		}
-		else if( hop instanceof IndexingOp ) 
-		{
+		else if( hop instanceof IndexingOp ) {
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			out = new CNodeTernary(cdata1, 
 				TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true),
@@ -475,7 +483,7 @@ public class TemplateRow extends TemplateBase
 		}
 		
 		if( out == null ) {
-			throw new RuntimeException(hop.getHopID()+" "+hop.getOpString());
+			throw new HopsException(hop.getHopID()+" "+hop.getOpString());
 		}
 		
 		if( out.getDataType().isMatrix() ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index b833876..4a61678 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -239,6 +239,19 @@ public class TemplateUtils
 		throw new RuntimeException("Undefined outer product type for hop "+out.getHopID());
 	}
 	
+	public static CNodeData getLiteral(CNode node) {
+		return ((CNodeData) node).isLiteral() ? (CNodeData)node :
+			createCNodeData(new LiteralOp(node.getVarname()), true);
+	}
+	
+	public static boolean isLiteral(CNode node) {
+		return node instanceof CNodeData && ((CNodeData)node).isLiteral();
+	}
+	
+	public static boolean isLiteral(CNode node, String val) {
+		return isLiteral(node) && ((CNodeData)node).getVarname().equals(val);
+	}
+	
 	public static boolean isLookup(CNode node, boolean includeRC1) {
 		return isUnary(node, UnaryType.LOOKUP_C, UnaryType.LOOKUP_RC)
 			|| (includeRC1 && isUnary(node, UnaryType.LOOKUP_R))

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 427b075..3a0c3f2 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -485,6 +485,13 @@ public class HopRewriteUtils
 			&& ArrayUtils.contains(ops, ((DataGenOp)hop).getOp()));
 	}
 	
+	public static boolean isDataGenOpWithLiteralInputs(Hop hop, DataGenMethod... ops) {
+		boolean ret = isDataGenOp(hop, ops);
+		for( Hop c : hop.getInput() )
+			ret &= c instanceof LiteralOp;
+		return ret;
+	}
+	
 	public static boolean isDataGenOpWithConstantValue(Hop hop) {
 		return hop instanceof DataGenOp
 			&& ((DataGenOp)hop).getOp()==DataGenMethod.RAND
@@ -1167,13 +1174,10 @@ public class HopRewriteUtils
 			|| (isColumnRightIndexing(input) && size.getInput().get(0)==input.getInput().get(0))));
 	}
 	
-	public static boolean hasOnlyWriteParents( Hop hop, boolean inclTransient, boolean inclPersistent )
-	{
+	public static boolean hasOnlyWriteParents( Hop hop, boolean inclTransient, boolean inclPersistent ) {
 		boolean ret = true;
-		
 		ArrayList<Hop> parents = hop.getParent();
-		for( Hop p : parents )
-		{
+		for( Hop p : parents ) {
 			if( inclTransient && inclPersistent )
 				ret &= ( p instanceof DataOp && (((DataOp)p).getDataOpType()==DataOpTypes.TRANSIENTWRITE
 				|| ((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE));
@@ -1182,8 +1186,14 @@ public class HopRewriteUtils
 			else if(inclPersistent)
 				ret &= ( p instanceof DataOp && ((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE);
 		}
-			
-				
+		return ret;
+	}
+	
+	public static boolean hasOnlyUnaryBinaryParents(Hop hop, boolean disallowLhs) {
+		boolean ret = true;
+		for( Hop p : hop.getParent() )
+			ret &= (p instanceof UnaryOp || (p instanceof BinaryOp 
+				&& (!disallowLhs || p.getInput().get(1)==hop)));
 		return ret;
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 20e6a95..5ec18f5 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -111,6 +111,10 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	
 	@Override
 	public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k) {
+		return execute(inputs, scalarObjects, k, 0);
+	}
+	
+	public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k, long rix) {
 		//sanity check
 		if( inputs==null || inputs.size() < 1  )
 			throw new RuntimeException("Invalid input arguments.");
@@ -136,11 +140,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		if( k <= 1 ) //SINGLE-THREADED
 		{
 			if( inputs.get(0) instanceof CompressedMatrixBlock )
-				ret = executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 0, m);
+				ret = executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 0, m, rix);
 			else if( !inputs.get(0).isInSparseFormat() )
-				ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m);
+				ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix);
 			else
-				ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m);
+				ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix);
 		}
 		else  //MULTI-THREADED
 		{
@@ -187,11 +191,15 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 
 	@Override
 	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-		return execute(inputs, scalarObjects, out, 1);
+		return execute(inputs, scalarObjects, out, 1, 0);
 	}
 	
 	@Override
 	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
+		return execute(inputs, scalarObjects, out, k, 0);
+	}
+	
+	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) {
 		//sanity check
 		if( inputs==null || inputs.size() < 1 || out==null )
 			throw new RuntimeException("Invalid input arguments.");
@@ -228,11 +236,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		if( k <= 1 ) //SINGLE-THREADED
 		{
 			if( inputs.get(0) instanceof CompressedMatrixBlock )
-				lnnz = executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 0, m);
+				lnnz = executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 0, m, rix);
 			else if( !inputs.get(0).isInSparseFormat() )
-				lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
+				lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m, rix);
 			else
-				lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
+				lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m, rix);
 		}
 		else  //MULTI-THREADED
 		{
@@ -286,42 +294,42 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	//function dispatch
 	
 	private long executeDense(DenseBlock a, SideInput[] b, double[] scalars, 
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) {
 		DenseBlock c = out.getDenseBlock();
 		SideInput[] lb = createSparseSideInputs(b);
 		
 		if( _type == CellType.NO_AGG ) {
-			return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
+			return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix);
 		}
 		else if( _type == CellType.ROW_AGG ) {
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix);
 		}
 		else if( _type == CellType.COL_AGG ) {
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix);
 		}
 		return -1;
 	}
 	
 	private double executeDenseAndAgg(DenseBlock a, SideInput[] b, double[] scalars, 
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		SideInput[] lb = createSparseSideInputs(b);
 		
 		//numerically stable aggregation for sum/sum_sq
 		if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-			return executeDenseAggSum(a, lb, scalars, m, n, sparseSafe, rl, ru);
+			return executeDenseAggSum(a, lb, scalars, m, n, sparseSafe, rl, ru, rix);
 		else
-			return executeDenseAggMxx(a, lb, scalars, m, n, sparseSafe, rl, ru);
+			return executeDenseAggMxx(a, lb, scalars, m, n, sparseSafe, rl, ru, rix);
 	}
 	
 	private long executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		if( sparseSafe && sblock == null )
 			return 0;
@@ -329,47 +337,47 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		SideInput[] lb = createSparseSideInputs(b);
 		if( _type == CellType.NO_AGG ) {
 			if( out.isInSparseFormat() )
-				return executeSparseNoAggSparse(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseNoAggSparse(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeSparseNoAggDense(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseNoAggDense(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 		}
 		else if( _type == CellType.ROW_AGG ) {
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeSparseRowAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseRowAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeSparseRowAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseRowAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 		}
 		else if( _type == CellType.COL_AGG ) {
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeSparseColAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseColAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeSparseColAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru);
+				return executeSparseColAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix);
 		}
 		
 		return -1;
 	}
 	
 	private double executeSparseAndAgg(SparseBlock sblock, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		if( sparseSafe && sblock == null )
 			return 0;
 		
 		SideInput[] lb = createSparseSideInputs(b);
 		if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-			return executeSparseAggSum(sblock, lb, scalars, m, n, sparseSafe, rl, ru);
+			return executeSparseAggSum(sblock, lb, scalars, m, n, sparseSafe, rl, ru, rix);
 		else
-			return executeSparseAggMxx(sblock, lb, scalars, m, n, sparseSafe, rl, ru);
+			return executeSparseAggMxx(sblock, lb, scalars, m, n, sparseSafe, rl, ru, rix);
 	}
 	
 	private long executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//NOTE: we don't create sparse side inputs w/ row-major cursors because 
 		//compressed data is access in a column-major order 
 		
 		if( _type == CellType.NO_AGG ) {
-			long lnnz = executeCompressedNoAgg(a, b, scalars, out, m, n, sparseSafe, rl, ru);
+			long lnnz = executeCompressedNoAgg(a, b, scalars, out, m, n, sparseSafe, rl, ru, rix);
 			if( out.isInSparseFormat() )
 				out.sortSparseRows(rl, ru);
 			return lnnz;
@@ -377,38 +385,38 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		else if( _type == CellType.ROW_AGG ) {
 			double[] c = out.getDenseBlockValues();
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeCompressedRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeCompressedRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeCompressedRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeCompressedRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix);
 		}
 		else if( _type == CellType.COL_AGG ) {
 			double[] c = out.getDenseBlockValues();
 			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-				return executeCompressedColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeCompressedColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix);
 			else
-				return executeCompressedColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+				return executeCompressedColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix);
 		}
 		return -1;
 	}
 	
 	private double executeCompressedAndAgg(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//NOTE: we don't create sparse side inputs w/ row-major cursors because 
 		//compressed data is access in a column-major order 
 		
 		//numerically stable aggregation for sum/sum_sq
 		if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-			return executeCompressedAggSum(a, b, scalars, m, n, sparseSafe, rl, ru);
+			return executeCompressedAggSum(a, b, scalars, m, n, sparseSafe, rl, ru, rix);
 		else
-			return executeCompressedAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru);
+			return executeCompressedAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru, rix);
 	}
 	
 	/////////
 	//core operator skeletons for dense, sparse, and compressed
 
 	private long executeDenseNoAgg(DenseBlock a, SideInput[] b, double[] scalars,
-			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		long lnnz = 0;
 		if( a == null && !sparseSafe ) {
@@ -416,7 +424,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				double[] cvals = c.values(i);
 				int cix = c.pos(i);
 				for( int j=0; j<n; j++ )
-					lnnz += ((cvals[cix+j] = genexec(0, b, scalars, m, n, i, j))!=0) ? 1 : 0;
+					lnnz += ((cvals[cix+j] = genexec(0, b, scalars, m, n, rix+i, i, j))!=0) ? 1 : 0;
 			}
 		}
 		else if( a != null ) {
@@ -427,7 +435,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[ix+j];
 					if( aval != 0 || !sparseSafe)
-						lnnz += ((cvals[ix+j] = genexec(aval, b, scalars, m, n, i, j))!=0) ? 1 : 0;
+						lnnz += ((cvals[ix+j] = genexec(aval, b, scalars, m, n, rix+i, i, j))!=0) ? 1 : 0;
 				}
 			}
 		}
@@ -436,7 +444,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, double[] scalars,
-		DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+		DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//note: output always single block
 		double[] lc = c.valuesAt(0);
@@ -448,7 +456,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			for( int i=rl; i<ru; i++ ) {
 				kbuff.set(0, 0);
 				for( int j=0; j<n; j++ )
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 				lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
 			}
 		}
@@ -460,7 +468,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[aix+j];
 					if( aval != 0 || !sparseSafe)
-						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j));
 				}
 				lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
 			}
@@ -470,7 +478,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeDenseRowAggMxx(DenseBlock a, SideInput[] b, double[] scalars,
-			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double[] lc = c.valuesAt(0); //single block
 		
@@ -481,7 +489,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			for( int i=rl; i<ru; i++ ) {
 				double tmp = initialVal;
 				for( int j=0; j<n; j++ )
-					tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+					tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j));
 				lnnz += ((lc[i] = tmp)!=0) ? 1 : 0;
 			}
 		}
@@ -493,7 +501,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[aix + j];
 					if( aval != 0 || !sparseSafe)
-						tmp = vfun.execute(tmp, genexec(aval, b, scalars, m, n, i, j));
+						tmp = vfun.execute(tmp, genexec(aval, b, scalars, m, n, rix+i, i, j));
 				}
 				if( sparseSafe && UtilFunctions.containsZero(avals, aix, n) )
 					tmp = vfun.execute(tmp, 0);
@@ -504,7 +512,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeDenseColAggSum(DenseBlock a, SideInput[] b, double[] scalars,
-		DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+		DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double[] lc = c.valuesAt(0); //single block
 		
@@ -516,7 +524,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			for( int i=rl; i<ru; i++ )
 				for( int j=0; j<n; j++ ) {
 					kbuff.set(lc[j], corr[j]);
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 					lc[j] = kbuff._sum;
 					corr[j] = kbuff._correction;
 				}
@@ -529,7 +537,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					double aval = avals[aix + j];
 					if( aval != 0 || !sparseSafe ) {
 						kbuff.set(lc[j], corr[j]);
-						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j));
 						lc[j] = kbuff._sum;
 						corr[j] = kbuff._correction;
 					}
@@ -541,7 +549,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeDenseColAggMxx(DenseBlock a, SideInput[] b, double[] scalars,
-			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+			DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double[] lc = c.valuesAt(0); //single block
 		
@@ -552,7 +560,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		if( a == null && !sparseSafe ) { //empty
 			for( int i=rl; i<ru; i++ )
 				for( int j=0; j<n; j++ )
-					lc[j] = vfun.execute(lc[j], genexec(0, b, scalars, m, n, i, j));
+					lc[j] = vfun.execute(lc[j], genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		else if( a != null ) { //general case
 			int[] counts = new int[n];
@@ -562,7 +570,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[aix + j];
 					if( aval != 0 || !sparseSafe ) {
-						lc[j] = vfun.execute(lc[j], genexec(aval, b, scalars, m, n, i, j));
+						lc[j] = vfun.execute(lc[j], genexec(aval, b, scalars, m, n, rix+i, i, j));
 						counts[j] ++;
 					}
 				}
@@ -576,7 +584,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private double executeDenseAggSum(DenseBlock a, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -584,7 +592,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		if( a == null && !sparseSafe ) {
 			for( int i=rl; i<ru; i++ )
 				for( int j=0; j<n; j++ )
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		else if( a != null ) {
 			for( int i=rl; i<ru; i++ ) {
@@ -593,7 +601,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[aix + j];
 					if( aval != 0 || !sparseSafe)
-						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+						kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j));
 				}
 			}
 		}
@@ -602,7 +610,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private double executeDenseAggMxx(DenseBlock a, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//safe aggregation for min/max w/ handling of zero entries
 		//note: sparse safe with zero value as min/max handled outside
@@ -612,7 +620,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		if( a == null && !sparseSafe ) {
 			for( int i=rl; i<ru; i++ )
 				for( int j=0; j<n; j++ )
-					ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+					ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		else if( a != null ) {
 			for( int i=rl; i<ru; i++ ) {
@@ -621,7 +629,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for( int j=0; j<n; j++ ) {
 					double aval = avals[aix + j];
 					if( aval != 0 || !sparseSafe)
-						ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j));
+						ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, rix+i, i, j));
 				}
 			}
 		}
@@ -630,7 +638,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeSparseNoAggSparse(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//note: sequential scan algorithm for both sparse-safe and -unsafe
 		//in order to avoid binary search for sparse-unsafe
@@ -649,16 +657,16 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							c.append(i, j, genexec(0, b, scalars, m, n, i, j));
+							c.append(i, j, genexec(0, b, scalars, m, n, rix+i, i, j));
 					//process current non-zero
 					lastj = aix[k];
-					c.append(i, lastj, genexec(avals[k], b, scalars, m, n, i, lastj));
+					c.append(i, lastj, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					c.append(i, j, genexec(0, b, scalars, m, n, i, j));
+					c.append(i, j, genexec(0, b, scalars, m, n, rix+i, i, j));
 			lnnz += c.size(i);
 		}
 		
@@ -666,7 +674,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeSparseNoAggDense(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//note: sequential scan algorithm for both sparse-safe and -unsafe
 		//in order to avoid binary search for sparse-unsafe
@@ -686,10 +694,10 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+							lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, rix+i, i, j))!=0)?1:0;
 					//process current non-zero
 					lastj = aix[k];
-					lnnz += ((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
+					lnnz += ((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, rix+i, i, lastj))!=0)?1:0;
 				}
 			}
 			//process empty rows or remaining zeros
@@ -697,14 +705,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				for(int j=lastj+1; j<n; j++) {
 					double[] cvals = c.values(i);
 					int cix = c.pos(i);
-					lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+					lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, rix+i, i, j))!=0)?1:0;
 				}
 		}
 		return lnnz;
 	}
 	
 	private long executeSparseRowAggSum(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -726,23 +734,23 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 					//process current non-zero
 					lastj = aix[k];
-					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 			lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
 		}
 		return lnnz;
 	}
 	
 	private long executeSparseRowAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
 		ValueFunction vfun = getAggFunction();
@@ -764,23 +772,23 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+							tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j));
 					//process current non-zero
 					lastj = aix[k];
-					tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
+					tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+					tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j));
 			lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
 		}
 		return lnnz;
 	}
 
 	private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -803,14 +811,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++) {
 							kbuff.set(c[j], corr[j]);
-							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 							c[j] = kbuff._sum;
 							corr[j] = kbuff._correction;
 						}
 					//process current non-zero
 					lastj = aix[k];
 					kbuff.set(c[aix[k]], corr[aix[k]]);
-					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 					c[aix[k]] = kbuff._sum;
 					corr[aix[k]] = kbuff._correction;
 				}
@@ -819,7 +827,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++) {
 					kbuff.set(c[j], corr[j]);
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 					c[j] = kbuff._sum;
 					corr[j] = kbuff._correction;
 				}
@@ -828,7 +836,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeSparseColAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
 		ValueFunction vfun = getAggFunction();
@@ -850,26 +858,26 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++) {
-							c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j));
+							c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, rix+i, i, j));
 							count[j] ++;
 						}
 					//process current non-zero
 					lastj = aix[k];
-					c[aix[k]] = vfun.execute(c[aix[k]], genexec(avals[k], b, scalars, m, n, i, lastj));
+					c[aix[k]] = vfun.execute(c[aix[k]], genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 					count[aix[k]] ++;
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j));
+					c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		
 		return -1;
 	}
 	
 	private double executeSparseAggSum(SparseBlock sblock, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -888,22 +896,22 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 					//process current non-zero
 					lastj = aix[k];
-					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		return kbuff._sum;
 	}
 	
 	private double executeSparseAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
 		ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
@@ -923,22 +931,22 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+							ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j));
 					//process current non-zero
 					lastj = aix[k];
-					ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj));
+					ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+					ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j));
 		}
 		return ret;
 	}
 	
 	private long executeCompressedNoAgg(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		double[] c = (out.getDenseBlock() != null) ?
 			out.getDenseBlockValues() : null;
@@ -958,7 +966,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			if( out.isInSparseFormat() ) {
 				csblock.allocate(cell.getI());
 				csblock.append(cell.getI(), cell.getJ(), val);
@@ -971,7 +979,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeCompressedRowAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -979,7 +987,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			kbuff.set(c[cell.getI()], 0);
 			kplus.execute2(kbuff, val);
 			c[cell.getI()] = kbuff._sum;
@@ -990,7 +998,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeCompressedRowAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
 		ValueFunction vfun = getAggFunction();
@@ -998,7 +1006,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			c[cell.getI()] = vfun.execute(c[cell.getI()], val);
 		}
 		for( int i=rl; i<ru; i++ )
@@ -1007,7 +1015,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeCompressedColAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-		double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+		double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -1016,7 +1024,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			kbuff.set(c[cell.getJ()], corr[cell.getJ()]);
 			kplus.execute2(kbuff, val);
 			c[cell.getJ()] = kbuff._sum;
@@ -1026,7 +1034,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeCompressedColAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
 		ValueFunction vfun = getAggFunction();
@@ -1034,7 +1042,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			c[cell.getI()] = vfun.execute(c[cell.getI()], val);
 		}
 		for( int i=rl; i<ru; i++ )
@@ -1043,8 +1051,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private double executeCompressedAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
+		//TODO handle sequences in special case summation
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
 		KahanObject kbuff2 = new KahanObject(0, 0);
@@ -1073,7 +1082,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 			while( iter.hasNext() ) {
 				IJV cell = iter.next();
-				double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+				double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 				kplus.execute2(kbuff, val);
 			}
 		}
@@ -1081,7 +1090,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private double executeCompressedAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars,
-			int m, int n, boolean sparseSafe, int rl, int ru)
+			int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		//safe aggregation for min/max w/ handling of zero entries
 		//note: sparse safe with zero value as min/max handled outside
@@ -1091,14 +1100,21 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 			ret = vfun.execute(ret, val);
 		}
 		return ret;
 	}
 	
+	//local execution where grix==rix
+	protected final double genexec( double a, SideInput[] b,
+		double[] scalars, int m, int n, int rix, int cix) {
+		return genexec(a, b, scalars, m, n, rix, rix, cix);
+	}
+	
+	//distributed execution with additional global row index
 	protected abstract double genexec( double a, SideInput[] b,
-			double[] scalars, int m, int n, int rowIndex, int colIndex);
+		double[] scalars, int m, int n, long gix, int rix, int cix);
 	
 	private class ParAggTask implements Callable<Double> 
 	{
@@ -1126,11 +1142,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		@Override
 		public Double call() {
 			if( _a instanceof CompressedMatrixBlock )
-				return executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, _safe, _rl, _ru);
+				return executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0);
 			else if (!_a.isInSparseFormat())
-				return executeDenseAndAgg(_a.getDenseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru);
+				return executeDenseAndAgg(_a.getDenseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0);
 			else
-				return executeSparseAndAgg(_a.getSparseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru);
+				return executeSparseAndAgg(_a.getSparseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0);
 		}
 	}
 
@@ -1166,11 +1182,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				_c.allocateDenseBlock();
 			}
 			if( _a instanceof CompressedMatrixBlock )
-				return executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+				return executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0);
 			else if( !_a.isInSparseFormat() )
-				return executeDense(_a.getDenseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+				return executeDense(_a.getDenseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0);
 			else
-				return executeSparse(_a.getSparseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+				return executeSparse(_a.getSparseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0);
 		}
 		
 		public MatrixBlock getResult() {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
index 8db037e..68ca6ff 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
@@ -73,11 +73,15 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 	
 	@Override
 	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-		return execute(inputs, scalarObjects, out, 1);
+		return execute(inputs, scalarObjects, out, 1, 0);
 	}
 	
 	@Override
 	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
+		return execute(inputs, scalarObjects, out, k, 0);
+	}
+	
+	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) {
 		//sanity check
 		if( inputs==null || inputs.size() < 1  )
 			throw new RuntimeException("Invalid input arguments.");
@@ -104,11 +108,11 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 		if( k <= 1 ) //SINGLE-THREADED
 		{
 			if( inputs.get(0) instanceof CompressedMatrixBlock )
-				executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, m);
+				executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, m, rix);
 			else if( !inputs.get(0).isInSparseFormat() )
-				executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m);
+				executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix);
 			else
-				executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m);
+				executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix);
 		}
 		else  //MULTI-THREADED
 		{
@@ -141,7 +145,7 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 		return out;
 	}
 	
-	private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+	private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
 	{
 		SideInput[] lb = createSparseSideInputs(b);
 		
@@ -149,20 +153,20 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 		if( a == null && !sparseSafe ) {
 			for( int i=rl; i<ru; i++ )
 				for( int j=0; j<n; j++ )
-					genexec( 0, lb, scalars, c, m, n, i, j );
+					genexec( 0, lb, scalars, c, m, n, rix+i, i, j );
 		}
 		else if( a != null ) {
 			for( int i=rl; i<ru; i++ ) { 
 				double[] avals = a.values(i);
 				int aix = a.pos(i);
 				for( int j=0; j<n; j++ )
-					genexec( avals[aix+j], lb, scalars, c, m, n, i, j );
+					genexec( avals[aix+j], lb, scalars, c, m, n, rix+i, i, j );
 			}
 		}
 	}
 	
 	private void executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars,
-			double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) 
 	{
 		if( sblock == null && sparseSafe )
 			return;
@@ -183,30 +187,38 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 					//process zeros before current non-zero
 					if( !sparseSafe )
 						for(int j=lastj+1; j<aix[k]; j++)
-							genexec(0, lb, scalars, c, m, n, i, j);
+							genexec(0, lb, scalars, c, m, n, rix+i, i, j);
 					//process current non-zero
 					lastj = aix[k];
-					genexec(avals[k], lb, scalars, c, m, n, i, lastj);
+					genexec(avals[k], lb, scalars, c, m, n, rix+i, i, lastj);
 				}
 			}
 			//process empty rows or remaining zeros
 			if( !sparseSafe )
 				for(int j=lastj+1; j<n; j++)
-					genexec(0, lb, scalars, c, m, n, i, j);
+					genexec(0, lb, scalars, c, m, n, rix+i, i, j);
 		}
 	}
 
-	private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rl, int ru)
+	private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rl, int ru, long rix)
 	{
 		//core compressed aggregation operation
 		Iterator<IJV> iter = a.getIterator(rl, ru, true);
 		while( iter.hasNext() ) {
 			IJV cell = iter.next();
-			genexec(cell.getV(), b, scalars, c, m, n, cell.getI(), cell.getJ());
+			genexec(cell.getV(), b, scalars, c, m, n, rix+cell.getI(), cell.getI(), cell.getJ());
 		}
 	}
 	
-	protected abstract void genexec( double a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rowIndex, int colIndex);
+	//local execution where grix==rix
+	protected final void genexec( double a, SideInput[] b,
+		double[] scalars, double[] c, int m, int n, int rix, int cix) {
+		genexec(a, b, scalars, c, m, n, rix, rix, cix);
+	}
+	
+	//distributed execution with additional global row index
+	protected abstract void genexec( double a, SideInput[] b,
+		double[] scalars, double[] c, int m, int n, long grix, int rix, int cix);
 	
 	
 	private void setInitialOutputValues(double[] c) {
@@ -304,11 +316,11 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
 			double[] c = new double[_aggOps.length];
 			setInitialOutputValues(c);
 			if( _a instanceof CompressedMatrixBlock )
-				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _rlen, _clen, _rl, _ru);
+				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _rlen, _clen, _rl, _ru, 0);
 			else if( !_a.isInSparseFormat() )
-				executeDense(_a.getDenseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru);
+				executeDense(_a.getDenseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru, 0);
 			else
-				executeSparse(_a.getSparseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru);
+				executeSparse(_a.getSparseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru, 0);
 			return c;
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 7454f78..507bd65 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -118,10 +118,10 @@ public abstract class SpoofRowwise extends SpoofOperator
 	
 	@Override
 	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-		return execute(inputs, scalarObjects, out, true, false);
+		return execute(inputs, scalarObjects, out, true, false, 0);
 	}
 	
-	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr) {
+	public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr, long rix) {
 		//sanity check
 		if( inputs==null || inputs.size() < 1 || out==null )
 			throw new RuntimeException("Invalid input arguments.");
@@ -149,11 +149,11 @@ public abstract class SpoofRowwise extends SpoofOperator
 		//core sequential execute
 		MatrixBlock a = inputs.get(0);
 		if( a instanceof CompressedMatrixBlock )
-			executeCompressed((CompressedMatrixBlock)a, b, scalars, c, n, 0, m);
+			executeCompressed((CompressedMatrixBlock)a, b, scalars, c, n, 0, m, rix);
 		else if( !a.isInSparseFormat() )
-			executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m);
+			executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m, rix);
 		else
-			executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m);
+			executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m, rix);
 		
 		//post-processing
 		if( allocTmp &&_reqVectMem > 0 )
@@ -285,39 +285,39 @@ public abstract class SpoofRowwise extends SpoofOperator
 		out.setNonZeros(out.getNumRows()*out.getNumColumns());
 	}
 	
-	private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) {
+	private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) {
 		//forward empty block to sparse
 		if( a == null ) {
-			executeSparse(null, b, scalars, c, n, rl, ru);
+			executeSparse(null, b, scalars, c, n, rl, ru, rix);
 			return;
 		}
 		
 		SideInput[] lb = createSparseSideInputs(b, true);
 		for( int i=rl; i<ru; i++ ) {
 			genexec(a.values(i), a.pos(i), lb, scalars,
-				c.values(i), c.pos(i), n, i );
+				c.values(i), c.pos(i), n, rix+i, i );
 		}
 	}
 	
-	private void executeSparse(SparseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) {
+	private void executeSparse(SparseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) {
 		SideInput[] lb = createSparseSideInputs(b, true);
 		SparseRow empty = new SparseRowVector(1);
 		for( int i=rl; i<ru; i++ ) {
 			if( a!=null && !a.isEmpty(i) ) {
 				//call generated method
 				genexec(a.values(i), a.indexes(i), a.pos(i), lb, scalars,
-					c.values(i), c.pos(i), a.size(i), n, i);
+					c.values(i), c.pos(i), a.size(i), n, rix+i, i);
 			}
 			else
 				genexec(empty.values(), empty.indexes(), 0, lb, scalars,
-					c.values(i), c.pos(i), 0, n, i);
+					c.values(i), c.pos(i), 0, n, rix+i, i);
 		}
 	}
 	
-	private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) {
+	private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) {
 		//forward empty block to sparse
 		if( a.isEmptyBlock(false) ) {
-			executeSparse(null, b, scalars, c, n, rl, ru);
+			executeSparse(null, b, scalars, c, n, rl, ru, rix);
 			return;
 		}
 		
@@ -331,11 +331,23 @@ public abstract class SpoofRowwise extends SpoofOperator
 	
 	//methods to be implemented by generated operators of type SpoofRowAggrgate 
 	
+	//local execution where grix==rix
+	protected final void genexec(double[] a, int ai, 
+		SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) {
+		genexec(a, ai, b, scalars, c, ci, len, rix, rix);
+	}
+	
+	protected final void genexec(double[] avals, int[] aix, int ai, 
+		SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, int rix) {
+		genexec(avals, aix, ai, b, scalars, c, ci, alen, n, rix, rix);
+	}
+	
+	//distributed execution with additional global row index
 	protected abstract void genexec(double[] a, int ai, 
-		SideInput[] b, double[] scalars, double[] c, int ci, int len, int rowIndex);
+		SideInput[] b, double[] scalars, double[] c, int ci, int len, long grix, int rix);
 	
 	protected abstract void genexec(double[] avals, int[] aix, int ai, 
-		SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, int rowIndex);
+		SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, long grix, int rix);
 
 	
 	/**
@@ -369,11 +381,11 @@ public abstract class SpoofRowwise extends SpoofOperator
 			DenseBlock c = DenseBlockFactory.createDenseBlock(1, _outLen);
 			
 			if( _a instanceof CompressedMatrixBlock )
-				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru);
+				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru, 0);
 			else if( !_a.isInSparseFormat() )
-				executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru);
+				executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru, 0);
 			else
-				executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru);
+				executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru, 0);
 			
 			if( _reqVectMem > 0 )
 				LibSpoofPrimitives.cleanupThreadLocalMemory();
@@ -413,11 +425,11 @@ public abstract class SpoofRowwise extends SpoofOperator
 				LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
 			
 			if( _a instanceof CompressedMatrixBlock )
-				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
 			else if( !_a.isInSparseFormat() )
-				executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+				executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
 			else
-				executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+				executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
 			
 			if( _reqVectMem > 0 )
 				LibSpoofPrimitives.cleanupThreadLocalMemory();