You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/22 20:52:25 UTC

[1/5] systemml git commit: [MINOR] Performance and cleanup ctable result extraction

Repository: systemml
Updated Branches:
  refs/heads/master fec209306 -> 856230c56


[MINOR] Performance and cleanup ctable result extraction

This patch cleans up the ctable result extraction by avoiding the
unnecessary materialization of result cells as list, in order to improve
memory-efficiency and performance.

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c6679b7b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c6679b7b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c6679b7b

Branch: refs/heads/master
Commit: c6679b7b890f2a4e4553988c9a043ef06cd8e9f4
Parents: fec2093
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Jul 20 21:36:22 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 22 13:53:14 2017 -0700

----------------------------------------------------------------------
 .../spark/TernarySPInstruction.java             |  8 ++-
 .../sysml/runtime/matrix/data/CTableMap.java    | 23 ++++----
 .../runtime/matrix/mapred/GMRCtableBuffer.java  |  9 ++--
 .../runtime/util/LongLongDoubleHashMap.java     | 56 ++++++++++++++------
 .../functions/sparse/SparseBlockAppendSort.java |  8 ++-
 .../functions/sparse/SparseBlockGetSet.java     |  8 ++-
 6 files changed, 74 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java
index a25dcf0..8bb62dc 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/TernarySPInstruction.java
@@ -459,18 +459,16 @@ public class TernarySPInstruction extends ComputationSPInstruction
 
 		private static final long serialVersionUID = -5933677686766674444L;
 		
-		@SuppressWarnings("deprecation")
 		@Override
 		public Iterator<Tuple2<MatrixIndexes, Double>> call(CTableMap ctableMap)
 				throws Exception {
 			ArrayList<Tuple2<MatrixIndexes, Double>> retVal = new ArrayList<Tuple2<MatrixIndexes, Double>>();
-			
-			for(LLDoubleEntry ijv : ctableMap.entrySet()) {
+			Iterator<LLDoubleEntry> iter = ctableMap.getIterator();
+			while( iter.hasNext() ) {
+				LLDoubleEntry ijv = iter.next();
 				long i = ijv.key1;
 				long j =  ijv.key2;
 				double v =  ijv.value;
-				
-				// retVal.add(new Tuple2<MatrixIndexes, MatrixCell>(blockIndexes, cell));
 				retVal.add(new Tuple2<MatrixIndexes, Double>(new MatrixIndexes(i, j), v));
 			}
 			return retVal.iterator();

http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java
index 2a11882..f93bace 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/CTableMap.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysml.runtime.matrix.data;
 
-import java.util.ArrayList;
+import java.util.Iterator;
 
 import org.apache.sysml.runtime.util.LongLongDoubleHashMap;
 import org.apache.sysml.runtime.util.LongLongDoubleHashMap.LLDoubleEntry;
@@ -43,15 +43,12 @@ public class CTableMap
 		_maxCol = -1;
 	}
 
-	public int size() 
-	{
+	public int size() {
 		return _map.size();
 	}
-
-	@Deprecated
-	public ArrayList<LLDoubleEntry> entrySet()
-	{
-		return _map.extractValues();
+	
+	public Iterator<LLDoubleEntry> getIterator() {
+		return _map.getIterator();
 	}
 
 	public long getMaxRow() {
@@ -83,8 +80,9 @@ public class CTableMap
 		if( sparse ) //SPARSE <- cells
 		{
 			//append cells to sparse target (prevent shifting)
-			for( LLDoubleEntry e : _map.extractValues() ) 
-			{
+			Iterator<LLDoubleEntry> iter2 = _map.getIterator();
+			while( iter2.hasNext() ) {
+				LLDoubleEntry e = iter2.next();
 				double value = e.value;
 				int rix = (int)e.key1;
 				int cix = (int)e.key2;
@@ -98,8 +96,9 @@ public class CTableMap
 		else  //DENSE <- cells
 		{
 			//directly insert cells into dense target 
-			for( LLDoubleEntry e : _map.extractValues() ) 
-			{
+			Iterator<LLDoubleEntry> iter = _map.getIterator();
+			while( iter.hasNext() ) {
+				LLDoubleEntry e = iter.next();
 				double value = e.value;
 				int rix = (int)e.key1;
 				int cix = (int)e.key2;

http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java
index d01145f..d5c00de 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/GMRCtableBuffer.java
@@ -22,6 +22,7 @@ package org.apache.sysml.runtime.matrix.mapred;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map.Entry;
 
 import org.apache.hadoop.mapred.Reporter;
@@ -105,7 +106,6 @@ public class GMRCtableBuffer
 		return _blockBuffer;
 	}
 
-	@SuppressWarnings("deprecation")
 	public void flushBuffer( Reporter reporter ) 
 		throws RuntimeException 
 	{
@@ -129,12 +129,13 @@ public class GMRCtableBuffer
 					}
 					
 					//output result data 
-					for(LLDoubleEntry e: resultMap.entrySet()) {
+					Iterator<LLDoubleEntry> iter = resultMap.getIterator();
+					while( iter.hasNext() ) {
+						LLDoubleEntry e = iter.next();
 						key = new MatrixIndexes(e.key1, e.key2);
 						value.setValue(e.value);
-						for(Integer i: resultIDs) {
+						for(Integer i: resultIDs)
 							_collector.collectOutput(key, value, i, reporter);
-						}
 					}
 				}
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java
index d8c8011..4c1cc0e 100644
--- a/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java
+++ b/src/main/java/org/apache/sysml/runtime/util/LongLongDoubleHashMap.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysml.runtime.util;
 
-import java.util.ArrayList;
+import java.util.Iterator;
 
 /**
  * This native long long - double hashmap is specifically designed for
@@ -73,20 +73,8 @@ public class LongLongDoubleHashMap
 			resize();
 	}
 
-	public ArrayList<LLDoubleEntry> extractValues()
-	{
-		ArrayList<LLDoubleEntry> ret = new ArrayList<LLDoubleEntry>();
-		for( LLDoubleEntry e : data ) {
-			if( e != null ) {
-				while( e.next!=null ) {
-					ret.add(e);
-					e = e.next;
-				}
-				ret.add(e);	
-			}
-		}
-
-		return ret;
+	public Iterator<LLDoubleEntry> getIterator() {
+		return new LLDoubleEntryIterator();
 	}
 
 	private void resize() {
@@ -138,4 +126,42 @@ public class LongLongDoubleHashMap
 			next = null;
 		}
 	}
+	
+	private class LLDoubleEntryIterator implements Iterator<LLDoubleEntry> {
+		private LLDoubleEntry _curr;
+		private int _currPos;
+		
+		public LLDoubleEntryIterator() {
+			_curr = null;
+			_currPos = -1;
+			findNext();
+		}
+		
+		@Override
+		public boolean hasNext() {
+			return (_curr != null);
+		}
+
+		@Override
+		public LLDoubleEntry next() {
+			LLDoubleEntry ret = _curr;
+			findNext();
+			return ret;
+		}
+		
+		private void findNext() {
+			if( _curr != null && _curr.next != null ) {
+				_curr = _curr.next;
+				return;
+			}
+			_currPos++;
+			while( _currPos < data.length  ) {
+				_curr = data[_currPos];
+				if( _curr != null ) 
+					return;
+				_currPos++;
+			}
+			_curr = null;
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java
index a563fde..4705e4a 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockAppendSort.java
@@ -21,6 +21,9 @@ package org.apache.sysml.test.integration.functions.sparse;
 
 import org.junit.Assert;
 import org.junit.Test;
+
+import java.util.Iterator;
+
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlockCOO;
 import org.apache.sysml.runtime.matrix.data.SparseBlockCSR;
@@ -175,8 +178,11 @@ public class SparseBlockAppendSort extends AutomatedTestBase
 				for( int i=0; i<rows; i++ )
 					for( int j=0; j<cols; j++ )
 						map.addValue(i, j, A[i][j]);
-				for( LLDoubleEntry e : map.extractValues() ) //random hash order
+				Iterator<LLDoubleEntry> iter = map.getIterator();
+				while( iter.hasNext() ) { //random hash order
+					LLDoubleEntry e = iter.next();
 					sblock.append((int)e.key1, (int)e.key2, e.value);
+				}
 			}	
 			
 			//sort appended values

http://git-wip-us.apache.org/repos/asf/systemml/blob/c6679b7b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java
index be160c9..6f84637 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockGetSet.java
@@ -21,6 +21,9 @@ package org.apache.sysml.test.integration.functions.sparse;
 
 import org.junit.Assert;
 import org.junit.Test;
+
+import java.util.Iterator;
+
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlockCOO;
@@ -233,8 +236,11 @@ public class SparseBlockGetSet extends AutomatedTestBase
 					for( int i=0; i<rows; i++ )
 						for( int j=0; j<cols; j++ )
 							map.addValue(i, j, A[i][j]);
-					for( LLDoubleEntry e : map.extractValues() ) //random hash order
+					Iterator<LLDoubleEntry> iter = map.getIterator();
+					while( iter.hasNext() ) { //random hash order
+						LLDoubleEntry e = iter.next();
 						sblock.set((int)e.key1, (int)e.key2, e.value);
+					}
 				}	
 			}
 			


[3/5] systemml git commit: [SYSTEMML-1788] Extended codegen cell-wise ops (column aggregation)

Posted by mb...@apache.org.
[SYSTEMML-1788] Extended codegen cell-wise ops (column aggregation)

This patch extends the code generator cell-wise template
(compiler/runtime) by column aggregations for sum, sumsq, min, and max.
Although, row-wise templates also cover column aggregations with sum,
the cell-wise template is more efficient (if no row aggregations are
required) because it does not create materialize vector intermediates
and hence is more cache-friendly for large numbers of columns.

Furthermore, this patch also includes some minor cleanups of the core
matrix block data structure and operations.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/45367829
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/45367829
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/45367829

Branch: refs/heads/master
Commit: 45367829a9b47dfbacaa5631770453ee76c3a1e9
Parents: d7e4c5a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Jul 21 21:04:00 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 22 13:53:16 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/cplan/CNodeCell.java     |   1 +
 .../hops/codegen/template/TemplateCell.java     |   7 +-
 .../hops/codegen/template/TemplateUtils.java    |  12 +-
 .../sysml/runtime/codegen/SpoofCellwise.java    | 243 ++++++++++++++++++-
 .../instructions/spark/SpoofSPInstruction.java  |  16 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  56 ++---
 .../functions/codegen/CellwiseTmplTest.java     |  44 +++-
 .../functions/codegen/RowAggTmplTest.java       |   2 +-
 .../scripts/functions/codegen/cellwisetmpl15.R  |  31 +++
 .../functions/codegen/cellwisetmpl15.dml        |  27 +++
 .../scripts/functions/codegen/cellwisetmpl16.R  |  30 +++
 .../functions/codegen/cellwisetmpl16.dml        |  27 +++
 .../scripts/functions/codegen/rowAggPattern14.R |   2 +-
 .../functions/codegen/rowAggPattern14.dml       |   2 +-
 14 files changed, 433 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
index 062e9a0..36cf56f 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
@@ -148,6 +148,7 @@ public class CNodeCell extends CNodeTpl
 		switch( _type ) {
 			case NO_AGG: return SpoofOutputDimsType.INPUT_DIMS;
 			case ROW_AGG: return SpoofOutputDimsType.ROW_DIMS;
+			case COL_AGG: return SpoofOutputDimsType.COLUMN_DIMS_COLS;
 			case FULL_AGG: return SpoofOutputDimsType.SCALAR;
 			default:
 				throw new RuntimeException("Unsupported cell type: "+_type.toString());

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
index c73216e..68f7412 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
@@ -31,7 +31,6 @@ import org.apache.sysml.hops.DataOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
-import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
 import org.apache.sysml.hops.IndexingOp;
@@ -82,8 +81,7 @@ public class TemplateCell extends TemplateBase
 	@Override
 	public boolean fuse(Hop hop, Hop input) {
 		return !isClosed() && (isValidOperation(hop) 
-			|| (HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_AGG) 
-				&& ((AggUnaryOp) hop).getDirection()!= Direction.Col)
+			|| HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_AGG)
 			|| (HopRewriteUtils.isMatrixMultiply(hop)
 				&& hop.getDim1()==1 && hop.getDim2()==1)
 				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
@@ -102,8 +100,7 @@ public class TemplateCell extends TemplateBase
 	@Override
 	public CloseType close(Hop hop) {
 		//need to close cell tpl after aggregation, see fuse for exact properties
-		if( (HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_AGG) 
-				&& ((AggUnaryOp) hop).getDirection()!= Direction.Col)
+		if( HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_AGG)
 			|| (HopRewriteUtils.isMatrixMultiply(hop) && hop.getDim1()==1 && hop.getDim2()==1) )
 			return CloseType.CLOSED_VALID;
 		else if( hop instanceof AggUnaryOp || hop instanceof AggBinaryOp )

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index 647c9d3..402f9fe 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -167,9 +167,15 @@ public class TemplateUtils
 	}
 	
 	public static CellType getCellType(Hop hop) {
-		return (hop instanceof AggBinaryOp) ? CellType.FULL_AGG :
-			(hop instanceof AggUnaryOp) ? ((((AggUnaryOp) hop).getDirection() == Direction.RowCol) ? 
-			CellType.FULL_AGG : CellType.ROW_AGG) : CellType.NO_AGG;
+		if( hop instanceof AggBinaryOp )
+			return CellType.FULL_AGG;
+		else if( hop instanceof AggUnaryOp )
+			switch( ((AggUnaryOp)hop).getDirection() ) {
+				case Row: return CellType.ROW_AGG;
+				case Col: return CellType.COL_AGG;
+				case RowCol: return CellType.FULL_AGG;
+			}
+		return CellType.NO_AGG;
 	}
 	
 	public static RowType getRowType(Hop output, Hop... inputs) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 15de508..08032af 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -43,6 +43,7 @@ import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.data.IJV;
+import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
 import org.apache.sysml.runtime.util.UtilFunctions;
@@ -56,6 +57,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		NO_AGG,
 		FULL_AGG,
 		ROW_AGG,
+		COL_AGG,
 	}
 	
 	//redefinition of Hop.AggOp for cleaner imports in generate class
@@ -208,10 +210,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				&& genexec( 0, b, scalars, m, n, 0, 0 ) == 0);
 		
 		//result allocation and preparations
-		boolean sparseOut = sparseSafe && a.isInSparseFormat()
-				&& _type == CellType.NO_AGG;
-		out.reset(a.getNumRows(), _type == CellType.NO_AGG ?
-				a.getNumColumns() : 1, sparseOut);
+		boolean sparseOut = _type == CellType.NO_AGG
+			&& sparseSafe && a.isInSparseFormat();
+		switch( _type ) {
+			case NO_AGG: out.reset(m, n, sparseOut); break;
+			case ROW_AGG: out.reset(m, 1, false); break;
+			case COL_AGG: out.reset(1, n, false); break;
+			default: throw new DMLRuntimeException("Invalid cell type: "+_type);
+		}
 		out.allocateDenseOrSparseBlock();
 		
 		long lnnz = 0;
@@ -244,6 +250,23 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				//aggregate nnz and error handling
 				for( Future<Long> task : taskret )
 					lnnz += task.get();
+				if( _type == CellType.COL_AGG ) {
+					//aggregate partial results
+					double[] c = out.getDenseBlock();
+					ValueFunction vfun = getAggFunction();
+					if( vfun instanceof KahanFunction ) {
+						for( ParExecTask task : tasks )
+							LibMatrixMult.vectAdd(task.getResult().getDenseBlock(), c, 0, 0, n);
+					}
+					else {
+						for( ParExecTask task : tasks ) {
+							double[] tmp = task.getResult().getDenseBlock();
+							for(int j=0; j<n; j++)
+								c[j] = vfun.execute(c[j], tmp[j]);
+						}
+					}
+					lnnz = out.recomputeNonZeros();
+				}
 			}
 			catch(Exception ex) {
 				throw new DMLRuntimeException(ex);
@@ -273,6 +296,12 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			else
 				return executeDenseRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
 		}
+		else if( _type == CellType.COL_AGG ) {
+			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+				return executeDenseColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+			else
+				return executeDenseColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+		}
 		return -1;
 	}
 	
@@ -305,6 +334,12 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			else
 				return executeSparseRowAggMxx(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
 		}
+		else if( _type == CellType.COL_AGG ) {
+			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+				return executeSparseColAggSum(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
+			else
+				return executeSparseColAggMxx(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
+		}
 		
 		return -1;
 	}
@@ -339,6 +374,13 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			else
 				return executeCompressedRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
 		}
+		else if( _type == CellType.COL_AGG ) {
+			double[] c = out.getDenseBlock();
+			if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+				return executeCompressedColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+			else
+				return executeCompressedColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+		}
 		return -1;
 	}
 	
@@ -372,8 +414,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	}
 	
 	private long executeDenseRowAggSum(double[] a, SideInput[] b, double[] scalars, 
-			double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
-		throws DMLRuntimeException 
+		double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
 	{
 		KahanFunction kplus = (KahanFunction) getAggFunction();
 		KahanObject kbuff = new KahanObject(0, 0);
@@ -401,7 +442,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			for( int i=rl; i<ru; i++ ) { 
 				double tmp = initialVal;
 				for( int j=0; j<n; j++ )
-					tmp = vfun.execute(tmp, genexec( 0, b, scalars, m, n, i, j ));
+					tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
 				lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
 			}
 		}
@@ -410,7 +451,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				double tmp = initialVal;
 				for( int j=0; j<n; j++, ix++ )
 					if( a[ix] != 0 || !sparseSafe)
-						tmp = vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j ));
+						tmp = vfun.execute(tmp, genexec(a[ix], b, scalars, m, n, i, j));
 				if( sparseSafe && UtilFunctions.containsZero(a, ix-n, n) )
 					tmp = vfun.execute(tmp, 0);
 				lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
@@ -419,6 +460,55 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		return lnnz;
 	}
 	
+	private long executeDenseColAggSum(double[] a, SideInput[] b, double[] scalars, 
+		double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+	{
+		KahanFunction kplus = (KahanFunction) getAggFunction();
+		KahanObject kbuff = new KahanObject(0, 0);
+		double[] corr = new double[n];
+		
+		for( int i=rl, ix=rl*n; i<ru; i++ )
+			for( int j=0; j<n; j++, ix++ ) {
+				double aval = (a != null) ? a[ix] : 0;
+				if( aval != 0 || !sparseSafe) {
+					kbuff.set(c[j], corr[j]);
+					kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+					c[j] = kbuff._sum;
+					corr[j] = kbuff._correction;
+				}
+			}
+		return -1;
+	}
+	
+	private long executeDenseColAggMxx(double[] a, SideInput[] b, double[] scalars, 
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+		ValueFunction vfun = getAggFunction();
+		Arrays.fill(c, initialVal);
+		
+		if( a == null && !sparseSafe ) { //empty
+			for( int i=rl; i<ru; i++ )
+				for( int j=0; j<n; j++ )
+					c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j));
+		}
+		else if( a != null ) { //general case
+			int[] counts = new int[n];
+			for( int i=rl, ix=rl*n; i<ru; i++ )
+				for( int j=0; j<n; j++, ix++ )
+					if( a[ix] != 0 || !sparseSafe) {
+						c[j] = vfun.execute(c[j], genexec(a[ix], b, scalars, m, n, i, j));
+						counts[j] ++;
+					}
+			if( sparseSafe )
+				for(int j=0; j<n; j++)
+					if( counts[j] != ru-rl )
+						c[j] = vfun.execute(c[j], 0);
+		}
+		return -1;
+	}
+	
 	private double executeDenseAggSum(double[] a, SideInput[] b, double[] scalars, 
 			int m, int n, boolean sparseSafe, int rl, int ru) 
 		throws DMLRuntimeException 
@@ -601,6 +691,97 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		}
 		return lnnz;
 	}
+
+	private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, 
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		KahanFunction kplus = (KahanFunction) getAggFunction();
+		KahanObject kbuff = new KahanObject(0, 0);
+		double[] corr = new double[n];
+		
+		//note: sequential scan algorithm for both sparse-safe and -unsafe 
+		//in order to avoid binary search for sparse-unsafe 
+		double[] c = out.getDenseBlock();
+		for(int i=rl; i<ru; i++) {
+			kbuff.set(0, 0);
+			int lastj = -1;
+			//handle non-empty rows
+			if( sblock != null && !sblock.isEmpty(i) ) {
+				int apos = sblock.pos(i);
+				int alen = sblock.size(i);
+				int[] aix = sblock.indexes(i);
+				double[] avals = sblock.values(i);
+				for(int k=apos; k<apos+alen; k++) {
+					//process zeros before current non-zero
+					if( !sparseSafe )
+						for(int j=lastj+1; j<aix[k]; j++) {
+							kbuff.set(c[aix[j]], corr[aix[j]]);
+							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+							c[aix[j]] = kbuff._sum;
+							corr[aix[j]] = kbuff._correction;
+						}
+					//process current non-zero
+					lastj = aix[k];
+					kbuff.set(c[aix[k]], corr[aix[k]]);
+					kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+					c[aix[k]] = kbuff._sum;
+					corr[aix[k]] = kbuff._correction;
+				}
+			}
+			//process empty rows or remaining zeros
+			if( !sparseSafe )
+				for(int j=lastj+1; j<n; j++) {
+					kbuff.set(c[j], corr[j]);
+					kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					c[j] = kbuff._sum;
+					corr[j] = kbuff._correction;
+				}
+		}
+		return -1;
+	}
+	
+	private long executeSparseColAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars, 
+			MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+		ValueFunction vfun = getAggFunction();
+		double[] c = out.getDenseBlock();
+		Arrays.fill(c, initialVal);
+		int[] count = new int[n];
+		
+		//note: sequential scan algorithm for both sparse-safe and -unsafe 
+		//in order to avoid binary search for sparse-unsafe 
+		for(int i=rl; i<ru; i++) {
+			int lastj = -1;
+			//handle non-empty rows
+			if( sblock != null && !sblock.isEmpty(i) ) {
+				int apos = sblock.pos(i);
+				int alen = sblock.size(i);
+				int[] aix = sblock.indexes(i);
+				double[] avals = sblock.values(i);
+				for(int k=apos; k<apos+alen; k++) {
+					//process zeros before current non-zero
+					if( !sparseSafe )
+						for(int j=lastj+1; j<aix[k]; j++) {
+							c[aix[j]] = vfun.execute(c[aix[j]], genexec(0, b, scalars, m, n, i, j));
+							count[aix[j]] ++;
+						}
+					//process current non-zero
+					lastj = aix[k];
+					c[aix[k]] = vfun.execute(c[aix[k]], genexec(avals[k], b, scalars, m, n, i, lastj));
+					count[aix[k]] ++;
+				}
+			}
+			//process empty rows or remaining zeros
+			if( !sparseSafe )
+				for(int j=lastj+1; j<n; j++)
+					c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j));
+		}
+		
+		return -1;
+	}
 	
 	private double executeSparseAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, 
 			int m, int n, boolean sparseSafe, int rl, int ru) 
@@ -744,6 +925,43 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		return lnnz;
 	}
 	
+	private long executeCompressedColAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, 
+		double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+	{
+		KahanFunction kplus = (KahanFunction) getAggFunction();
+		KahanObject kbuff = new KahanObject(0, 0);
+		double[] corr = new double[n];
+		
+		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
+		while( iter.hasNext() ) {
+			IJV cell = iter.next();
+			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			kbuff.set(c[cell.getJ()], corr[cell.getJ()]);
+			kplus.execute2(kbuff, val);
+			c[cell.getJ()] = kbuff._sum;
+			corr[cell.getJ()] = kbuff._correction;
+		}
+		return -1;
+	}
+	
+	private long executeCompressedColAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars, 
+			double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE);
+		ValueFunction vfun = getAggFunction();
+		long lnnz = 0;
+		Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
+		while( iter.hasNext() ) {
+			IJV cell = iter.next();
+			double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ());
+			c[cell.getI()] = vfun.execute(c[cell.getI()], val);
+		}
+		for( int i=rl; i<ru; i++ )
+			lnnz += (c[i]!=0) ? 1 : 0;
+		return lnnz;
+	}
+	
 	private double executeCompressedAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, 
 			int m, int n, boolean sparseSafe, int rl, int ru) 
 		throws DMLRuntimeException 
@@ -820,7 +1038,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		private final MatrixBlock _a;
 		private final SideInput[] _b;
 		private final double[] _scalars;
-		private final MatrixBlock _c;
+		private MatrixBlock _c;
 		private final int _rlen;
 		private final int _clen;
 		private final boolean _safe;
@@ -842,12 +1060,17 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		
 		@Override
 		public Long call() throws DMLRuntimeException {
+			_c = (_type==CellType.COL_AGG)? new MatrixBlock(1,_clen, false) : _c;
 			if( _a instanceof CompressedMatrixBlock )
 				return executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
 			else if( !_a.isInSparseFormat() )
 				return executeDense(_a.getDenseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
 			else
-				return executeSparse(_a.getSparseBlock(), _b, _scalars,  _c, _rlen, _clen, _safe, _rl, _ru);
+				return executeSparse(_a.getSparseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+		}
+		
+		public MatrixBlock getResult() {
+			return _c;
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
index 1d360a1..eae5560 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
@@ -128,11 +128,17 @@ public class SpoofSPInstruction extends SPInstruction
 			AggregateOperator aggop = getAggregateOperator(op.getAggOp());
 			
 			if( _out.getDataType()==DataType.MATRIX ) {
-				out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcMatrices, scalars), true);
-				if( op.getCellType()==CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock() ) {
+				//execute codegen block operation
+				out = in.mapPartitionsToPair(new CellwiseFunction(
+					_class.getName(), _classBytes, bcMatrices, scalars), true);
+				
+				if( (op.getCellType()==CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock())
+					|| (op.getCellType()==CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
 					//TODO investigate if some other side effect of correct blocks
-					if( out.partitions().size() > mcIn.getNumRowBlocks() )
-						out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int)mcIn.getNumRowBlocks(), false);
+					long numBlocks = (op.getCellType()==CellType.ROW_AGG ) ? 
+						mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
+					if( out.partitions().size() > numBlocks )
+						out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int)numBlocks, false);
 					else
 						out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
 				}
@@ -405,6 +411,8 @@ public class SpoofSPInstruction extends SPInstruction
 				else {
 					if(((SpoofCellwise)_op).getCellType()==CellType.ROW_AGG)
 						ixOut = new MatrixIndexes(ixOut.getRowIndex(), 1);
+					else if(((SpoofCellwise)_op).getCellType()==CellType.COL_AGG)
+						ixOut = new MatrixIndexes(1, ixOut.getColumnIndex());
 					_op.execute(inputs, _scalars, blkOut);
 				}
 				ret.add(new Tuple2<MatrixIndexes,MatrixBlock>(ixOut, blkOut));

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 835e491..d5306cb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -36,7 +36,6 @@ import org.apache.commons.math3.random.Well1024a;
 import org.apache.hadoop.io.DataInputBuffer;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.lops.MMTSJ.MMTSJType;
 import org.apache.sysml.lops.MapMultChain.ChainType;
@@ -89,7 +88,6 @@ import org.apache.sysml.runtime.util.IndexRange;
 import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.utils.GPUStatistics;
 import org.apache.sysml.utils.NativeHelper;
-import org.apache.sysml.utils.Statistics;
 
 
 
@@ -110,9 +108,6 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	//basic header (int rlen, int clen, byte type)
 	public static final int HEADER_SIZE = 9;
 	
-	//internal stats flag for matrix block internals //TODO remove
-	private static final boolean DISPLAY_STATISTICS = false; 
-	
 	public enum BlockType{
 		EMPTY_BLOCK,  
 		ULTRA_SPARSE_BLOCK, //ultra sparse representation, in-mem same as sparse
@@ -324,17 +319,12 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		recomputeNonZeros();
 	}
 
-	public boolean isAllocated()
-	{
-		if( sparse )
-			return (sparseBlock!=null);
-		else
-			return (denseBlock!=null);
+	public boolean isAllocated() {
+		return sparse ? (sparseBlock!=null)
+			: (denseBlock!=null);
 	}
 
-	public void allocateDenseBlock() 
-		throws RuntimeException 
-	{
+	public void allocateDenseBlock() {
 		allocateDenseBlock( true );
 	}
 
@@ -345,24 +335,22 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			allocateDenseBlock();
 	}
 	
-	@SuppressWarnings("unused")
 	public void allocateDenseBlock(boolean clearNNZ) 
-			throws RuntimeException 
 	{
 		long limit = (long)rlen * clen;
 		
 		//check max size constraint (16GB dense), since java arrays are limited to 2^(32-1) elements)
 		if( limit > Integer.MAX_VALUE ) {
 			String execType = OptimizerUtils.isSparkExecutionMode() ? "SPARK" : "MR";
-			throw new RuntimeException("Dense in-memory matrix block ("+rlen+"x"+clen+") exceeds supported size of "+Integer.MAX_VALUE+" elements (16GB). " +
-					                   "Please, report this issue and reduce the JVM heapsize to execute this operation in "+execType+".");
+			throw new RuntimeException("Dense in-memory matrix block ("+rlen+"x"+clen+") "
+				+ "exceeds supported size of "+Integer.MAX_VALUE+" elements (16GB). "
+				+ "Please, report this issue and reduce the JVM heapsize to execute "
+				+ "this operation in "+execType+".");
 		}
 		
 		//allocate block if non-existing or too small (guaranteed to be 0-initialized),
 		if(denseBlock == null || denseBlock.length < limit) {
-			long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
 			denseBlock = new double[(int)limit];
-			Statistics.allocateDoubleArrTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
 		}
 		
 		//clear nnz if necessary
@@ -1008,11 +996,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	 * @param opcode  extended opcode
 	 * @throws DMLRuntimeException if DMLRuntimeException occurs
 	 */
-	@SuppressWarnings("unused")
 	public void examSparsity(String opcode) 
 		throws DMLRuntimeException
 	{
-		long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
 		//determine target representation
 		boolean sparseDst = evalSparseFormatInMemory(); 
 				
@@ -1026,8 +1012,6 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			sparseToDense(opcode);
 		else if( !sparse && sparseDst )
 			denseToSparse(opcode);
-		
-		Statistics.examSparsityTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
 	}
 	
 	/**
@@ -1187,25 +1171,20 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	 * of the entire matrix block.
 	 * 
 	 */
-	@SuppressWarnings("unused")
-	public void recomputeNonZeros()
-	{
-		if( sparse && sparseBlock!=null ) //SPARSE (max long)
-		{
+	public long recomputeNonZeros() {
+		if( sparse && sparseBlock!=null ) { //SPARSE (max long)
 			//note: rlen might be <= sparseBlock.numRows()
 			nonZeros = sparseBlock.size(0, sparseBlock.numRows());
 		}
-		else if( !sparse && denseBlock!=null ) //DENSE (max int)
-		{
-			long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
+		else if( !sparse && denseBlock!=null ) { //DENSE (max int)
 			double[] a = denseBlock;
 			final int limit=rlen*clen;
 			int nnz = 0;
 			for(int i=0; i<limit; i++)
 				nnz += (a[i]!=0) ? 1 : 0;
 			nonZeros = nnz;
-			Statistics.recomputeNNZTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
 		}
+		return nonZeros;
 	}
 	
 	/**
@@ -2552,10 +2531,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 				nz2 = nz2 * m;
 		
 			//compute output sparsity consistent w/ the hop compiler
-			OpOp2 bop = op.getBinaryOperatorOpOp2();
 			double sp1 = OptimizerUtils.getSparsity(m, n, nz1);
 			double sp2 = OptimizerUtils.getSparsity(m, n, nz2);
-			double spout = OptimizerUtils.getBinaryOpSparsity(sp1, sp2, bop, true);
+			double spout = OptimizerUtils.getBinaryOpSparsity(
+				sp1, sp2, op.getBinaryOperatorOpOp2(), true);
 			estnnz = UtilFunctions.toLong(spout * m * n);
 		}
 		
@@ -5808,11 +5787,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	////////
 	// Misc methods
 	
-	private static MatrixBlock checkType(MatrixValue block) 
-		throws RuntimeException
-	{
+	private static MatrixBlock checkType(MatrixValue block) {
 		if( block!=null && !(block instanceof MatrixBlock))
-			throw new RuntimeException("Unsupported matrix value: "+block.getClass().getSimpleName());
+			throw new RuntimeException("Unsupported matrix value: "
+				+ block.getClass().getSimpleName());
 		return (MatrixBlock) block;
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
index fbd456f..701a367 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
@@ -50,6 +50,9 @@ public class CellwiseTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME12 = TEST_NAME+12; //((X/3) %% 0.6) + ((X/3) %/% 0.6)
 	private static final String TEST_NAME13 = TEST_NAME+13; //min(X + 7 * Y) large
 	private static final String TEST_NAME14 = TEST_NAME+14; //-2 * X + t(Y); t(Y) is rowvector
+	private static final String TEST_NAME15 = TEST_NAME+15; //colMins(2*log(X))
+	private static final String TEST_NAME16 = TEST_NAME+16; //colSums(2*log(X)); 
+	
 	
 	private static final String TEST_DIR = "functions/codegen/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + CellwiseTmplTest.class.getSimpleName() + "/";
@@ -62,7 +65,7 @@ public class CellwiseTmplTest extends AutomatedTestBase
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for( int i=1; i<=14; i++ ) {
+		for( int i=1; i<=16; i++ ) {
 			addTestConfiguration( TEST_NAME+i, new TestConfiguration(
 					TEST_CLASS_DIR, TEST_NAME+i, new String[] {String.valueOf(i)}) );
 		}
@@ -255,6 +258,37 @@ public class CellwiseTmplTest extends AutomatedTestBase
 		testCodegenIntegration( TEST_NAME14, true, ExecType.SPARK );
 	}
 	
+	@Test
+	public void testCodegenCellwiseRewrite15() {
+		testCodegenIntegration( TEST_NAME15, true, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenCellwise15() {
+		testCodegenIntegration( TEST_NAME15, false, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite15_sp() {
+		testCodegenIntegration( TEST_NAME15, true, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite16() {
+		testCodegenIntegration( TEST_NAME16, true, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenCellwise16() {
+		testCodegenIntegration( TEST_NAME16, false, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite16_sp() {
+		testCodegenIntegration( TEST_NAME16, true, ExecType.SPARK );
+	}
+	
+	
 	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
 	{			
 		boolean oldRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
@@ -281,7 +315,7 @@ public class CellwiseTmplTest extends AutomatedTestBase
 			
 			String HOME = SCRIPT_DIR + TEST_DIR;
 			fullDMLScriptName = HOME + testname + ".dml";
-			programArgs = new String[]{"-explain", "runtime", "-stats", "-args", output("S") };
+			programArgs = new String[]{"-explain", "hops", "-stats", "-args", output("S") };
 			
 			fullRScriptName = HOME + testname + ".R";
 			rCmd = getRCmd(inputDir(), expectedDir());			
@@ -313,7 +347,11 @@ public class CellwiseTmplTest extends AutomatedTestBase
 			else if( testname.equals(TEST_NAME10) ) //ensure min/max is fused
 				Assert.assertTrue(!heavyHittersContainsSubString("uamin","uamax"));
 			else if( testname.equals(TEST_NAME11) ) //ensure replace is fused
-				Assert.assertTrue(!heavyHittersContainsSubString("replace"));	
+				Assert.assertTrue(!heavyHittersContainsSubString("replace"));
+			else if( testname.equals(TEST_NAME15) )
+				Assert.assertTrue(!heavyHittersContainsSubString("uacmin"));
+			else if( testname.equals(TEST_NAME16) )
+				Assert.assertTrue(!heavyHittersContainsSubString("uack+"));
 		}
 		finally {
 			rtplatform = platformOld;

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index 2092f22..6d25130 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -49,7 +49,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME11 = TEST_NAME+"11"; //y - X %*% v
 	private static final String TEST_NAME12 = TEST_NAME+"12"; //Y=(X>=v); R=Y/rowSums(Y)
 	private static final String TEST_NAME13 = TEST_NAME+"13"; //rowSums(X)+rowSums(Y)
-	private static final String TEST_NAME14 = TEST_NAME+"14"; //colSums(max(floor(round(abs(min(sign(X+Y),1)))),7))
+	private static final String TEST_NAME14 = TEST_NAME+"14"; //colSums(max(floor(round(abs(min(sign(X+Y),rowSums(X))))),7))
 	private static final String TEST_NAME15 = TEST_NAME+"15"; //systemml nn - softmax backward
 	private static final String TEST_NAME16 = TEST_NAME+"16"; //Y=X-rowIndexMax(X); R=Y/rowSums(Y)
 	private static final String TEST_NAME17 = TEST_NAME+"17"; //MLogreg - vector-matrix w/ indexing

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/cellwisetmpl15.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl15.R b/src/test/scripts/functions/codegen/cellwisetmpl15.R
new file mode 100644
index 0000000..ac7da9a
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl15.R
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(7, 1100*200+6), 1100, 200, byrow=TRUE);
+
+R = t(colMins(2*log(X)));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/cellwisetmpl15.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl15.dml b/src/test/scripts/functions/codegen/cellwisetmpl15.dml
new file mode 100644
index 0000000..e543671
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl15.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(seq(7, 1100*200+6), 1100, 200);
+if(1==1){}
+
+R = colMins(2*log(X));
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/cellwisetmpl16.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl16.R b/src/test/scripts/functions/codegen/cellwisetmpl16.R
new file mode 100644
index 0000000..d8fa3b9
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl16.R
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+X = matrix(seq(7, 1100*200+6), 1100, 200, byrow=TRUE);
+
+R = t(colSums(2*log(X)));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/cellwisetmpl16.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl16.dml b/src/test/scripts/functions/codegen/cellwisetmpl16.dml
new file mode 100644
index 0000000..1fb07b5
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl16.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(seq(7, 1100*200+6), 1100, 200);
+if(1==1){}
+
+R = colSums(2*log(X));
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/rowAggPattern14.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern14.R b/src/test/scripts/functions/codegen/rowAggPattern14.R
index 34589e1..60f7714 100644
--- a/src/test/scripts/functions/codegen/rowAggPattern14.R
+++ b/src/test/scripts/functions/codegen/rowAggPattern14.R
@@ -28,7 +28,7 @@ library("matrixStats")
 X = matrix(seq(1,1500), 150, 10, byrow=TRUE);
 y = seq(1,150);
 
-Z = pmax(floor(round(abs(pmin(sign(X+y),1)))),7);
+Z = pmax(floor(round(abs(pmin(sign(X+y),rowSums(X)%*%matrix(1,1,10))))),7);
 R = t(colSums(Z));
 
 writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep="")); 

http://git-wip-us.apache.org/repos/asf/systemml/blob/45367829/src/test/scripts/functions/codegen/rowAggPattern14.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern14.dml b/src/test/scripts/functions/codegen/rowAggPattern14.dml
index f13c1ff..b47df7e 100644
--- a/src/test/scripts/functions/codegen/rowAggPattern14.dml
+++ b/src/test/scripts/functions/codegen/rowAggPattern14.dml
@@ -22,7 +22,7 @@
 X = matrix(seq(1,1500), rows=150, cols=10);
 y = seq(1,150);
 
-Z = max(floor(round(abs(min(sign(X+y),1)))),7)
+Z = max(floor(round(abs(min(sign(X+y),rowSums(X))))),7)
 R = colSums(Z); 
 
 write(R, $1)


[4/5] systemml git commit: [SYSTEMML-1800] Utils for reading matrix/frame blocks from streams

Posted by mb...@apache.org.
[SYSTEMML-1800] Utils for reading matrix/frame blocks from streams

In JMLC deployments, models and meta data are often read from resource
streams of packaged artifacts. This patch adds some util functions for
deserializing matrix and frame blocks directly from such input streams
to avoid the expensive read of text formats.

Furthermore, this patch also cleans up various java doc issues that were
recently introduced.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0fee3f66
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0fee3f66
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0fee3f66

Branch: refs/heads/master
Commit: 0fee3f66615621b8bfeed5f1e453a875b6b5731b
Parents: 4536782
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Jul 21 22:12:00 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 22 13:53:17 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/instructions/Instruction.java |  10 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  19 +--
 .../sysml/runtime/util/LocalFileUtils.java      | 146 ++++++++++++++-----
 3 files changed, 110 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/0fee3f66/src/main/java/org/apache/sysml/runtime/instructions/Instruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/Instruction.java b/src/main/java/org/apache/sysml/runtime/instructions/Instruction.java
index ad8cb92..6db8c7f 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/Instruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/Instruction.java
@@ -71,15 +71,7 @@ public abstract class Instruction
 		return type;
 	}
 	
-	/**
-	 * Setter for instruction line/column number 
-	 * 
-	 * @param beginLine beginning line position
-	 * @param endLine ending line position
-	 * @param beginCol beginning column position
-	 * @param endCol ending column position
-	 */
-	public void setLocation ( String filename, int beginLine, int endLine,  int beginCol, int endCol) {
+	public void setLocation(String filename, int beginLine, int endLine, int beginCol, int endCol) {
 		this.filename = filename;
 		this.beginLine = beginLine;
 		this.endLine = endLine;

http://git-wip-us.apache.org/repos/asf/systemml/blob/0fee3f66/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index d5306cb..f3c9f7b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1170,6 +1170,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	 * Recomputes and materializes the number of non-zero values
 	 * of the entire matrix block.
 	 * 
+	 * @return number of non-zeros
 	 */
 	public long recomputeNonZeros() {
 		if( sparse && sparseBlock!=null ) { //SPARSE (max long)
@@ -3637,24 +3638,6 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		return leftIndexingOperations(rhsMatrix, rl, ru, cl, cu, ret, update, null);
 	}
 	
-	/**
-	 * Method to perform leftIndexing operation for a given lower and upper bounds in row and column dimensions.
-	 * Updated matrix is returned as the output.
-	 * 
-	 * Operations to be performed: 
-	 *   1) result=this; 
-	 *   2) result[rowLower:rowUpper, colLower:colUpper] = rhsMatrix;
-	 * 
-	 * @param rhsMatrix matrix
-	 * @param rl row lower
-	 * @param ru row upper
-	 * @param cl column lower
-	 * @param cu column upper
-	 * @param ret ?
-	 * @param update ?
-	 * @return matrix block
-	 * @throws DMLRuntimeException if DMLRuntimeException occurs
-	 */
 	public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, 
 			int cl, int cu, MatrixBlock ret, UpdateType update, String opcode) 
 		throws DMLRuntimeException 

http://git-wip-us.apache.org/repos/asf/systemml/blob/0fee3f66/src/main/java/org/apache/sysml/runtime/util/LocalFileUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/LocalFileUtils.java b/src/main/java/org/apache/sysml/runtime/util/LocalFileUtils.java
index 0d74088..deda468 100644
--- a/src/main/java/org/apache/sysml/runtime/util/LocalFileUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/util/LocalFileUtils.java
@@ -72,52 +72,62 @@ public class LocalFileUtils
 	
 	/** Reads a matrix block from local file system.
 	 * 
-	 * @param filePathAndName file to read
+	 * @param fname file name to read
 	 * @return matrix block
 	 * @throws IOException if IOException occurs
 	 */
-	public static MatrixBlock readMatrixBlockFromLocal(String filePathAndName) throws IOException {
-		return (MatrixBlock) readWritableFromLocal(filePathAndName, new MatrixBlock());
+	public static MatrixBlock readMatrixBlockFromLocal(String fname) throws IOException {
+		return (MatrixBlock) readWritableFromLocal(fname, new MatrixBlock());
 	}
 	
 	/** Reads a matrix block from local file system.
 	 * 
-	 * @param filePathAndName file to read
+	 * @param fname file name to read
 	 * @param reuse matrix block to reuse
 	 * @return matrix block
 	 * @throws IOException if IOException occurs
 	 */
-	public static MatrixBlock readMatrixBlockFromLocal(String filePathAndName, MatrixBlock reuse) throws IOException {
-		return (MatrixBlock) readWritableFromLocal(filePathAndName, reuse);
+	public static MatrixBlock readMatrixBlockFromLocal(String fname, MatrixBlock reuse) throws IOException {
+		return (MatrixBlock) readWritableFromLocal(fname, reuse);
 	}
 
+	/** Reads a frame block from local file system.
+	 * 
+	 * @param fname file name to read
+	 * @return frame block
+	 * @throws IOException if IOException occurs
+	 */
+	public static FrameBlock readFrameBlockFromLocal(String fname) throws IOException {
+		return (FrameBlock) readWritableFromLocal(fname, new FrameBlock());
+	}
+	
 	/** Reads a matrix/frame block from local file system.
 	 * 
-	 * @param filePathAndName file to read
+	 * @param fname file name to read
 	 * @param matrix if true, read matrix. if false, read frame.
 	 * @return cache block (common interface to MatrixBlock and FrameBlock)
 	 * @throws IOException if IOException occurs
 	 */
-	public static CacheBlock readCacheBlockFromLocal(String filePathAndName, boolean matrix) throws IOException {
-		return (CacheBlock) readWritableFromLocal(filePathAndName, matrix?new MatrixBlock():new FrameBlock());
+	public static CacheBlock readCacheBlockFromLocal(String fname, boolean matrix) throws IOException {
+		return (CacheBlock) readWritableFromLocal(fname, matrix?new MatrixBlock():new FrameBlock());
 	}
 	
 	/**
 	 * Reads an arbitrary writable from local file system, using a fused buffered reader
 	 * with special support for matrix blocks.
 	 * 
-	 * @param filePathAndName file to read
+	 * @param fname file name to read
 	 * @param ret hadoop writable
 	 * @return hadoop writable
 	 * @throws IOException if IOException occurs
 	 */
-	public static Writable readWritableFromLocal(String filePathAndName, Writable ret)
+	public static Writable readWritableFromLocal(String fname, Writable ret)
 		throws IOException
 	{
-		FileInputStream fis = new FileInputStream( filePathAndName );
-		DataInput in  = !(ret instanceof MatrixBlock) ? 
-				new DataInputStream(new BufferedInputStream(fis, BUFFER_SIZE)) :
-				new FastBufferedDataInputStream(fis, BUFFER_SIZE);		
+		FileInputStream fis = new FileInputStream(fname);
+		DataInput in = !(ret instanceof MatrixBlock) ? 
+			new DataInputStream(new BufferedInputStream(fis, BUFFER_SIZE)) :
+			new FastBufferedDataInputStream(fis, BUFFER_SIZE);		
 		try {
 			ret.readFields(in);
 		}
@@ -129,38 +139,98 @@ public class LocalFileUtils
 		return ret;
 	}
 	
+	/**
+	 * Reads a matrix block from an input stream, using a fused buffered reader
+	 * with special support for matrix blocks.
+	 * 
+	 * @param is input stream to read
+	 * @return matrix block
+	 * @throws IOException if IOException occurs
+	 */
+	public static MatrixBlock readMatrixBlockFromStream(InputStream is) throws IOException {
+		return (MatrixBlock) readWritableFromStream(is, new MatrixBlock());
+	}
+	
+	/**
+	 * Reads a frame block from an input stream, using a fused buffered reader
+	 * with special support for matrix blocks.
+	 * 
+	 * @param is input stream to read
+	 * @return frame block
+	 * @throws IOException if IOException occurs
+	 */
+	public static FrameBlock readFrameBlockFromStream(InputStream is) throws IOException {
+		return (FrameBlock) readWritableFromStream(is, new FrameBlock());
+	}
+	
+	/**
+	 * Reads an arbitrary writable from an input stream, using a fused buffered reader
+	 * with special support for matrix blocks.
+	 * 
+	 * @param is input stream to read
+	 * @param ret hadoop writable
+	 * @return hadoop writable
+	 * @throws IOException if IOException occurs
+	 */
+	public static Writable readWritableFromStream(InputStream is, Writable ret)
+		throws IOException
+	{
+		DataInput in = !(ret instanceof MatrixBlock) ? 
+			new DataInputStream(new BufferedInputStream(is, BUFFER_SIZE)) :
+			new FastBufferedDataInputStream(is, BUFFER_SIZE);		
+		try {
+			ret.readFields(in);
+		}
+		finally {
+			IOUtilFunctions.closeSilently((InputStream)in);
+			IOUtilFunctions.closeSilently(is);
+		}
+		
+		return ret;
+	}
+	
 	/** Writes a matrix block to local file system.
 	 * 
-	 * @param filePathAndName file to write
+	 * @param fname file name to write
 	 * @param mb matrix block
 	 * @throws IOException if IOException occurs
 	 */
-	public static void writeMatrixBlockToLocal(String filePathAndName, MatrixBlock mb) throws IOException {
-		writeWritableToLocal(filePathAndName, mb);
+	public static void writeMatrixBlockToLocal(String fname, MatrixBlock mb) throws IOException {
+		writeWritableToLocal(fname, mb);
+	}
+	
+	/** Writes a frame block to local file system.
+	 * 
+	 * @param fname file name to write
+	 * @param fb fame block
+	 * @throws IOException if IOException occurs
+	 */
+	public static void writeFrameBlockToLocal(String fname, FrameBlock fb) throws IOException {
+		writeWritableToLocal(fname, fb);
 	}
 
 	/** Writes a matrix/frame block to local file system.
 	 * 
-	 * @param filePathAndName file to write
+	 * @param fname file name to write
 	 * @param cb cache block (common interface to matrix block and frame block)
 	 * @throws IOException if IOException occurs
 	 */
-	public static void writeCacheBlockToLocal(String filePathAndName, CacheBlock cb) throws IOException {
-		writeWritableToLocal(filePathAndName, cb);
+	public static void writeCacheBlockToLocal(String fname, CacheBlock cb) throws IOException {
+		writeWritableToLocal(fname, cb);
 	}
 	
 	/**
 	 * Writes an arbitrary writable to local file system, using a fused buffered writer
 	 * with special support for matrix blocks.
 	 * 
-	 * @param filePathAndName file to write
+	 * @param fname file name to write
 	 * @param mb Hadoop writable
 	 * @throws IOException if IOException occurs
 	 */
-	public static void writeWritableToLocal(String filePathAndName, Writable mb)
+	public static void writeWritableToLocal(String fname, Writable mb)
 		throws IOException
 	{	
-		FileOutputStream fos = new FileOutputStream( filePathAndName );
+		FileOutputStream fos = new FileOutputStream( fname );
 		FastBufferedDataOutputStream out = new FastBufferedDataOutputStream(fos, BUFFER_SIZE);
 		
 		try {
@@ -172,13 +242,13 @@ public class LocalFileUtils
 		}	
 	}
 
-	public static void writeByteArrayToLocal( String filePathAndName, byte[] data )
+	public static void writeByteArrayToLocal( String fname, byte[] data )
 		throws IOException
 	{	
 		//byte array write via java.nio file channel ~10-15% faster than java.io
 		FileChannel channel = null;
 		try {
-			Path path = Paths.get(filePathAndName);
+			Path path = Paths.get(fname);
 			channel = FileChannel.open(path, StandardOpenOption.CREATE, 
 				StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE);
 			channel.write(ByteBuffer.wrap(data));
@@ -188,15 +258,15 @@ public class LocalFileUtils
 		}
 	}
 
-	public static int readBlockSequenceFromLocal( String filePathAndName, Pair<MatrixIndexes,MatrixValue>[] outValues, HashMap<MatrixIndexes, Integer> outMap) 
+	public static int readBlockSequenceFromLocal(String fname, 
+			Pair<MatrixIndexes,MatrixValue>[] outValues, HashMap<MatrixIndexes, Integer> outMap) 
 		throws IOException
 	{
-		FileInputStream fis = new FileInputStream( filePathAndName );
-		FastBufferedDataInputStream in = new FastBufferedDataInputStream( fis, BUFFER_SIZE );
+		FileInputStream fis = new FileInputStream(fname);
+		FastBufferedDataInputStream in = new FastBufferedDataInputStream(fis, BUFFER_SIZE);
 		int bufferSize = 0;
 		
-		try
-		{
+		try {
 			int len = in.readInt();
 			for( int i=0; i<len; i++ ) {
 				outValues[i].getKey().readFields(in);
@@ -214,13 +284,13 @@ public class LocalFileUtils
 		return bufferSize;
 	}
 
-	public static void writeBlockSequenceToLocal( String filePathAndName, Pair<MatrixIndexes,MatrixValue>[] inValues, int len ) 
+	public static void writeBlockSequenceToLocal(String fname, Pair<MatrixIndexes,MatrixValue>[] inValues, int len) 
 		throws IOException
 	{
 		if( len > inValues.length )
 			throw new IOException("Invalid length of block sequence: len="+len+" vs data="+inValues.length);
 		
-		FileOutputStream fos = new FileOutputStream( filePathAndName );
+		FileOutputStream fos = new FileOutputStream(fname);
 		FastBufferedDataOutputStream out = new FastBufferedDataOutputStream(fos, BUFFER_SIZE);
 		
 		try 
@@ -281,7 +351,7 @@ public class LocalFileUtils
 		return ret;
 	}
 
-	public static void setLocalFilePermissions( File file, String permissions )
+	public static void setLocalFilePermissions(File file, String permissions)
 	{
 		//note: user and group treated the same way
 		char[] c = permissions.toCharArray();
@@ -310,7 +380,7 @@ public class LocalFileUtils
 		return createWorkingDirectoryWithUUID( DMLScript.getUUID() );
 	}
 
-	public static String createWorkingDirectoryWithUUID( String uuid )
+	public static String createWorkingDirectoryWithUUID(String uuid)
 		throws DMLRuntimeException 
 	{
 		//create local tmp dir if not existing
@@ -407,7 +477,7 @@ public class LocalFileUtils
 		return count;
 	}
 
-	public static String getWorkingDir( String category ) 
+	public static String getWorkingDir(String category) 
 		throws DMLRuntimeException
 	{
 		if( _workingDir == null )
@@ -422,7 +492,7 @@ public class LocalFileUtils
 		return sb.toString();
 	}
 
-	public static String getUniqueWorkingDir( String category ) 
+	public static String getUniqueWorkingDir(String category) 
 		throws DMLRuntimeException
 	{
 		if( _workingDir == null )
@@ -446,7 +516,7 @@ public class LocalFileUtils
 	 * @param text content of text file 
 	 * @throws IOException
 	 */
-	public static void writeTextFile( File file, String text ) 
+	public static void writeTextFile(File file, String text) 
 		throws IOException 
 	{
 		Writer writer = null;


[5/5] systemml git commit: [SYSTEMML-1506] Support codegen through all APIs, incl jmlc/mlcontext

Posted by mb...@apache.org.
[SYSTEMML-1506] Support codegen through all APIs, incl jmlc/mlcontext

This patch modifies the codegen compiler integration to make it
applicable through all APIs (command line, mlcontext, jmlc, debug)
without modifying the individual replicated compilation chains.
Furthermore, this patch also cleans up the dml program data structure
and the generation of runtime programs.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/856230c5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/856230c5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/856230c5

Branch: refs/heads/master
Commit: 856230c56535742d824aab681fceea1567c26567
Parents: 0fee3f6
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Jul 22 13:40:45 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 22 13:53:18 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/api/DMLScript.java    |  25 +-
 .../apache/sysml/api/ScriptExecutorUtils.java   |   5 +-
 .../org/apache/sysml/api/jmlc/Connection.java   |   6 +-
 .../sysml/api/mlcontext/ScriptExecutor.java     |   6 +-
 .../org/apache/sysml/conf/CompilerConfig.java   |   6 +-
 .../apache/sysml/conf/ConfigurationManager.java |   5 +
 .../apache/sysml/hops/recompile/Recompiler.java |   9 +-
 .../org/apache/sysml/parser/DMLProgram.java     | 597 +------------------
 .../org/apache/sysml/parser/DMLTranslator.java  | 406 ++++++++++++-
 .../parfor/opt/ProgramRecompiler.java           |   3 +-
 .../java/org/apache/sysml/utils/Statistics.java |   3 +-
 .../functions/codegen/APICodegenTest.java       | 115 ++++
 .../functions/codegen/ZPackageSuite.java        |   1 +
 13 files changed, 549 insertions(+), 638 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index 2674af4..f428aa2 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -65,9 +65,6 @@ import org.apache.sysml.debug.DMLDebuggerProgramInfo;
 import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel;
-import org.apache.sysml.hops.codegen.SpoofCompiler;
-import org.apache.sysml.hops.codegen.SpoofCompiler.IntegrationType;
-import org.apache.sysml.hops.codegen.SpoofCompiler.PlanCachePolicy;
 import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopsException;
@@ -690,16 +687,6 @@ public class DMLScript
 	
 		//Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
 		dmlt.rewriteHopsDAG(prog);
-
-		//Step 5.1: Generate code for the rewritten Hop dags 
-		if( dmlconf.getBooleanValue(DMLConfig.CODEGEN) ){
-			SpoofCompiler.PLAN_CACHE_POLICY = PlanCachePolicy.get(
-					dmlconf.getBooleanValue(DMLConfig.CODEGEN_PLANCACHE),
-					dmlconf.getIntValue(DMLConfig.CODEGEN_LITERALS)==2);
-			SpoofCompiler.setExecTypeSpecificJavaCompiler();
-			if( SpoofCompiler.INTEGRATION==IntegrationType.HOPS )
-				dmlt.codgenHopsDAG(prog);
-		}
 		
 		//Step 6: construct lops (incl exec type and op selection)
 		dmlt.constructLops(prog);
@@ -710,14 +697,8 @@ public class DMLScript
 			dmlt.resetLopsDAGVisitStatus(prog);
 		}
 		
-		//Step 7: generate runtime program
-		Program rtprog = prog.getRuntimeProgram(dmlconf);
-
-		//Step 7.1: Generate code for the rewritten Hop dags w/o modify
-		if( dmlconf.getBooleanValue(DMLConfig.CODEGEN) 
-			&& SpoofCompiler.INTEGRATION==IntegrationType.RUNTIME ){
-			dmlt.codgenHopsDAG(rtprog);
-		}
+		//Step 7: generate runtime program, incl codegen
+		Program rtprog = dmlt.getRuntimeProgram(prog, dmlconf);
 		
 		//Step 8: [optional global data flow optimization]
 		if(OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY) ) 
@@ -807,7 +788,7 @@ public class DMLScript
 		dmlt.constructLops(prog);
 	
 		//Step 6: generate runtime program
-		dbprog.rtprog = prog.getRuntimeProgram(conf);
+		dbprog.rtprog = dmlt.getRuntimeProgram(prog, conf);
 		
 		try {
 			//set execution environment

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index b094c91..ebbcc21 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -22,6 +22,7 @@ package org.apache.sysml.api;
 import java.util.List;
 
 import org.apache.sysml.api.mlcontext.ScriptExecutor;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.codegen.SpoofCompiler;
 import org.apache.sysml.runtime.DMLRuntimeException;
@@ -98,9 +99,9 @@ public class ScriptExecutorUtils {
 				ec.getGPUContexts().forEach(gCtx -> gCtx.clearTemporaryMemory());
 				GPUContextPool.freeAllGPUContexts();
 			}
-			if (dmlconf.getBooleanValue(DMLConfig.CODEGEN))
+			if( ConfigurationManager.isCodegenEnabled() )
 				SpoofCompiler.cleanupCodeGenerator();
-
+			
 			// display statistics (incl caching stats if enabled)
 			Statistics.stopRunTimer();
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/api/jmlc/Connection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/Connection.java b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
index be440c8..1993ed4 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
@@ -42,6 +42,7 @@ import org.apache.sysml.conf.CompilerConfig;
 import org.apache.sysml.conf.CompilerConfig.ConfigType;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
+import org.apache.sysml.hops.codegen.SpoofCompiler;
 import org.apache.sysml.hops.rewrite.ProgramRewriter;
 import org.apache.sysml.hops.rewrite.RewriteRemovePersistentReadWrite;
 import org.apache.sysml.parser.DMLProgram;
@@ -122,6 +123,7 @@ public class Connection implements Closeable
 		cconf.set(ConfigType.ALLOW_DYN_RECOMPILATION, false);
 		cconf.set(ConfigType.ALLOW_INDIVIDUAL_SB_SPECIFIC_OPS, false);
 		cconf.set(ConfigType.ALLOW_CSE_PERSISTENT_READS, false);
+		cconf.set(ConfigType.CODEGEN_ENABLED, false);
 		ConfigurationManager.setLocalConfig(cconf);
 		
 		//disable caching globally 
@@ -216,7 +218,7 @@ public class Connection implements Closeable
 			
 			//lop construct and runtime prog generation
 			dmlt.constructLops(prog);
-			rtprog = prog.getRuntimeProgram(_dmlconf);
+			rtprog = dmlt.getRuntimeProgram(prog, _dmlconf);
 			
 			//final cleanup runtime prog
 			JMLCUtils.cleanupRuntimeProgram(rtprog, outputs);
@@ -247,6 +249,8 @@ public class Connection implements Closeable
 		ConfigurationManager.clearLocalConfigs();
 		if( ConfigurationManager.isDynamicRecompilation() )
 			JMLCProxy.setActive(null);
+		if( ConfigurationManager.isCodegenEnabled() )
+			SpoofCompiler.cleanupCodeGenerator();
 	}
 	
 	/**

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
index 6d19166..1a5d0bb 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
@@ -169,7 +169,7 @@ public class ScriptExecutor {
 	protected void rewriteHops() {
 		try {
 			dmlTranslator.rewriteHopsDAG(dmlProgram);
-		} catch (LanguageException | HopsException | ParseException e) {
+		} catch (LanguageException | HopsException | ParseException | DMLRuntimeException e) {
 			throw new MLContextException("Exception occurred while rewriting HOPS (high-level operators)", e);
 		}
 	}
@@ -210,8 +210,8 @@ public class ScriptExecutor {
 	 */
 	protected void generateRuntimeProgram() {
 		try {
-			runtimeProgram = dmlProgram.getRuntimeProgram(config);
-		} catch (LanguageException | DMLRuntimeException | LopsException | IOException e) {
+			runtimeProgram = dmlTranslator.getRuntimeProgram(dmlProgram, config);
+		} catch (LanguageException | DMLRuntimeException | LopsException | IOException | HopsException e) {
 			throw new MLContextException("Exception occurred while generating runtime program", e);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/conf/CompilerConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/CompilerConfig.java b/src/main/java/org/apache/sysml/conf/CompilerConfig.java
index fa473f0..4fc7765 100644
--- a/src/main/java/org/apache/sysml/conf/CompilerConfig.java
+++ b/src/main/java/org/apache/sysml/conf/CompilerConfig.java
@@ -71,7 +71,10 @@ public class CompilerConfig
 		//data on mlcontext (local) /jmlc (global); ignore unknowns on jmlc
 		IGNORE_READ_WRITE_METADATA, // global skip meta data reads
 		REJECT_READ_WRITE_UNKNOWNS, // ignore missing meta data	
-		MLCONTEXT // execution via new MLContext
+		MLCONTEXT, // execution via new MLContext
+		
+		//code generation enabled 
+		CODEGEN_ENABLED;
 	}
 	
 	//default flags (exposed for testing purposes only)
@@ -98,6 +101,7 @@ public class CompilerConfig
 		_bmap.put(ConfigType.IGNORE_READ_WRITE_METADATA, false);
 		_bmap.put(ConfigType.REJECT_READ_WRITE_UNKNOWNS, true);
 		_bmap.put(ConfigType.MLCONTEXT, false);
+		_bmap.put(ConfigType.CODEGEN_ENABLED, false);
 		
 		_imap = new HashMap<CompilerConfig.ConfigType, Integer>();
 		_imap.put(ConfigType.BLOCK_SIZE, OptimizerUtils.DEFAULT_BLOCKSIZE);

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/conf/ConfigurationManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/ConfigurationManager.java b/src/main/java/org/apache/sysml/conf/ConfigurationManager.java
index 7a8c70b..903466a 100644
--- a/src/main/java/org/apache/sysml/conf/ConfigurationManager.java
+++ b/src/main/java/org/apache/sysml/conf/ConfigurationManager.java
@@ -178,6 +178,11 @@ public class ConfigurationManager
 		return getCompilerConfigFlag(ConfigType.PARALLEL_LOCAL_OR_REMOTE_PARFOR);
 	}
 	
+	public static boolean isCodegenEnabled() {
+		return getDMLConfig().getBooleanValue(DMLConfig.CODEGEN)
+			|| getCompilerConfigFlag(ConfigType.CODEGEN_ENABLED);
+	}
+	
 	
 	///////////////////////////////////////
 	// Thread-local classes

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
index 04d521b7..091164d 100644
--- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
@@ -34,7 +34,6 @@ import org.apache.wink.json4j.JSONObject;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.jmlc.JMLCProxy;
 import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.conf.CompilerConfig.ConfigType;
 import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.DataOp;
@@ -222,8 +221,8 @@ public class Recompiler
 			memo.extract(hops, status);
 			
 			// codegen if enabled
-			if( ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.CODEGEN) 
-					&& SpoofCompiler.RECOMPILE_CODEGEN ) {
+			if( ConfigurationManager.isCodegenEnabled()
+				&& SpoofCompiler.RECOMPILE_CODEGEN ) {
 				Hop.resetVisitStatus(hops);
 				hops = SpoofCompiler.optimize(hops, 
 					(status==null || !status.isInitialCodegen()));
@@ -337,8 +336,8 @@ public class Recompiler
 			hops.refreshMemEstimates(memo); 		
 			
 			// codegen if enabled
-			if( ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.CODEGEN) 
-					&& SpoofCompiler.RECOMPILE_CODEGEN ) {
+			if( ConfigurationManager.isCodegenEnabled()
+				&& SpoofCompiler.RECOMPILE_CODEGEN ) {
 				hops.resetVisitStatus();
 				hops = SpoofCompiler.optimize(hops,
 					(status==null || !status.isInitialCodegen()));

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/parser/DMLProgram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLProgram.java b/src/main/java/org/apache/sysml/parser/DMLProgram.java
index 15d46c7..82ca7a6 100644
--- a/src/main/java/org/apache/sysml/parser/DMLProgram.java
+++ b/src/main/java/org/apache/sysml/parser/DMLProgram.java
@@ -19,39 +19,17 @@
 
 package org.apache.sysml.parser;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.DMLConfig;
-import org.apache.sysml.lops.LopProperties;
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.LopsException;
-import org.apache.sysml.lops.compile.Dag;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock;
-import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
-import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
-import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
-import org.apache.sysml.runtime.controlprogram.IfProgramBlock;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
 import org.apache.sysml.runtime.controlprogram.Program;
-import org.apache.sysml.runtime.controlprogram.ProgramBlock;
-import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
-import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
-import org.apache.sysml.runtime.instructions.CPInstructionParser;
-import org.apache.sysml.runtime.instructions.Instruction;
-import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction;
 
 
 public class DMLProgram 
 {
-	
 	private ArrayList<StatementBlock> _blocks;
 	private HashMap<String, FunctionStatementBlock> _functionBlocks;
 	private HashMap<String,DMLProgram> _namespaces;
@@ -183,582 +161,11 @@ public class DMLProgram
 		return sb.toString();
 	}
 	
-	
-	public Program getRuntimeProgram(DMLConfig config) throws IOException, LanguageException, DMLRuntimeException, LopsException {
-		
-		// constructor resets the set of registered functions
-		Program rtprog = new Program();
-		
-		// for all namespaces, translate function statement blocks into function program blocks
-		for (String namespace : _namespaces.keySet()){
-		
-			for (String fname : getFunctionStatementBlocks(namespace).keySet()){
-				// add program block to program
-				FunctionStatementBlock fsb = getFunctionStatementBlocks(namespace).get(fname);
-				FunctionProgramBlock rtpb = (FunctionProgramBlock)createRuntimeProgramBlock(rtprog, fsb, config);
-				rtprog.addFunctionProgramBlock(namespace, fname, rtpb);
-				rtpb.setRecompileOnce( fsb.isRecompileOnce() );
-			}
-		}
-		
-		// for each top-level block
-		for (StatementBlock sb : _blocks) {
-		
-			// add program block to program
-			ProgramBlock rtpb = createRuntimeProgramBlock(rtprog, sb, config);
-			rtprog.addProgramBlock(rtpb);
-		}
-		
-		
-		return rtprog ;
-	}
-	
-	public ProgramBlock createRuntimeProgramBlock(Program prog, StatementBlock sb, DMLConfig config) 
-		throws IOException, LopsException, DMLRuntimeException 
-	{
-		Dag<Lop> dag = null; 
-		Dag<Lop> pred_dag = null;
-
-		ArrayList<Instruction> instruct;
-		ArrayList<Instruction> pred_instruct = null;
-		
-		ProgramBlock retPB = null;
-		
-		// process While Statement - add runtime program blocks to program
-		if (sb instanceof WhileStatementBlock){
-		
-			// create DAG for loop predicates
-			pred_dag = new Dag<Lop>();
-			((WhileStatementBlock) sb).get_predicateLops().addToDag(pred_dag);
-			
-			// create instructions for loop predicates
-			pred_instruct = new ArrayList<Instruction>();
-			ArrayList<Instruction> pInst = pred_dag.getJobs(null, config);
-			for (Instruction i : pInst ) {
-				pred_instruct.add(i);
-			}
-			
-			// create while program block
-			WhileProgramBlock rtpb = new WhileProgramBlock(prog, pred_instruct);
-			
-			if (rtpb.getPredicateResultVar() == null) {
-				// e.g case : WHILE(continue)
-				if ( ((WhileStatementBlock) sb).get_predicateLops().getExecLocation() == LopProperties.ExecLocation.Data ) {
-					String resultVar = ((WhileStatementBlock) sb).get_predicateLops().getOutputParameters().getLabel();
-					rtpb.setPredicateResultVar( resultVar );
-				}
-				else {
-					LOG.error(sb.printBlockErrorLocation() + "Error in translating the WHILE predicate."); 
-					throw new LopsException(sb.printBlockErrorLocation() + "Error in translating the WHILE predicate."); 
-			
-				}
-			}			
-			//// process the body of the while statement block ////
-			
-			WhileStatementBlock wsb = (WhileStatementBlock)sb;
-			if (wsb.getNumStatements() > 1){
-				LOG.error(wsb.printBlockErrorLocation() + "WhileStatementBlock should only have 1 statement");
-				throw new LopsException(wsb.printBlockErrorLocation() + "WhileStatementBlock should only have 1 statement");
-			}
-			WhileStatement wstmt = (WhileStatement)wsb.getStatement(0);
-			for (StatementBlock sblock : wstmt.getBody()){
-				
-				// process the body
-				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
-				rtpb.addProgramBlock(childBlock);
-			}
-			
-			// check there are actually Lops in to process (loop stmt body will not have any)
-			if (wsb.getLops() != null && !wsb.getLops().isEmpty() ){
-				LOG.error(wsb.printBlockErrorLocation() + "WhileStatementBlock should have no Lops");
-				throw new LopsException(wsb.printBlockErrorLocation() + "WhileStatementBlock should have no Lops");
-			}
-			
-			
-			retPB = rtpb;
-			
-			//post processing for generating missing instructions
-			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
-			
-			// add statement block
-			retPB.setStatementBlock(sb);
-			
-			// add location information
-			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
-		}
-		
-		// process If Statement - add runtime program blocks to program
-		else if (sb instanceof IfStatementBlock){
-		
-			// create DAG for loop predicates
-			pred_dag = new Dag<Lop>();
-			((IfStatementBlock) sb).get_predicateLops().addToDag(pred_dag);
-			
-			// create instructions for loop predicates
-			pred_instruct = new ArrayList<Instruction>();
-			ArrayList<Instruction> pInst = pred_dag.getJobs(null, config);
-			for (Instruction i : pInst ) {
-				pred_instruct.add(i);
-			}
-			
-			// create if program block
-			IfProgramBlock rtpb = new IfProgramBlock(prog, pred_instruct);
-			
-			if (rtpb.getPredicateResultVar() == null ) {
-				// e.g case : If(continue)
-				if ( ((IfStatementBlock) sb).get_predicateLops().getExecLocation() == LopProperties.ExecLocation.Data ) {
-					String resultVar = ((IfStatementBlock) sb).get_predicateLops().getOutputParameters().getLabel();
-					rtpb.setPredicateResultVar( resultVar );
-				}
-				else {
-					LOG.error(sb.printBlockErrorLocation() + "Error in translating the IF predicate."); 
-					throw new LopsException(sb.printBlockErrorLocation() + "Error in translating the IF predicate."); 
-				}
-			}
-			
-			// process the body of the if statement block
-			IfStatementBlock isb = (IfStatementBlock)sb;
-			if (isb.getNumStatements() > 1){
-				LOG.error(isb.printBlockErrorLocation() + "IfStatementBlock should have only 1 statement");
-				throw new LopsException(isb.printBlockErrorLocation() + "IfStatementBlock should have only 1 statement");
-			}
-			IfStatement istmt = (IfStatement)isb.getStatement(0);
-			
-			// process the if body
-			for (StatementBlock sblock : istmt.getIfBody()){
-				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
-				rtpb.addProgramBlockIfBody(childBlock);
-			}
-			
-			// process the else body
-			for (StatementBlock sblock : istmt.getElseBody()){
-				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
-				rtpb.addProgramBlockElseBody(childBlock); 
-			}
-			
-			// check there are actually Lops in to process (loop stmt body will not have any)
-			if (isb.getLops() != null && !isb.getLops().isEmpty() ){
-				LOG.error(isb.printBlockErrorLocation() + "IfStatementBlock should have no Lops");
-				throw new LopsException(isb.printBlockErrorLocation() + "IfStatementBlock should have no Lops");
-			}
-			
-			retPB = rtpb;
-			
-			//post processing for generating missing instructions
-			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
-			
-			// add statement block
-			retPB.setStatementBlock(sb);
-			
-			// add location information
-			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
-		}
-		
-		// process For Statement - add runtime program blocks to program
-		// NOTE: applies to ForStatementBlock and ParForStatementBlock
-		else if (sb instanceof ForStatementBlock) 
-		{ 
-			ForStatementBlock fsb = (ForStatementBlock) sb;
-			
-			// create DAGs for loop predicates 
-			Dag<Lop> fromDag = new Dag<Lop>();
-			Dag<Lop> toDag = new Dag<Lop>();
-			Dag<Lop> incrementDag = new Dag<Lop>();
-			if( fsb.getFromHops()!=null )
-				fsb.getFromLops().addToDag(fromDag);
-			if( fsb.getToHops()!=null )
-				fsb.getToLops().addToDag(toDag);		
-			if( fsb.getIncrementHops()!=null )
-				fsb.getIncrementLops().addToDag(incrementDag);		
-				
-			// create instructions for loop predicates			
-			ArrayList<Instruction> fromInstructions = fromDag.getJobs(null, config);
-			ArrayList<Instruction> toInstructions = toDag.getJobs(null, config);
-			ArrayList<Instruction> incrementInstructions = incrementDag.getJobs(null, config);		
-
-			// create for program block
-			String sbName = null;
-			ForProgramBlock rtpb = null;
-			IterablePredicate iterPred = fsb.getIterPredicate();
-			String [] iterPredData= IterablePredicate.createIterablePredicateVariables(iterPred.getIterVar().getName(),
-					                                                                   fsb.getFromLops(), fsb.getToLops(), fsb.getIncrementLops()); 
-			
-			if( sb instanceof ParForStatementBlock )
-			{
-				sbName = "ParForStatementBlock";
-				rtpb = new ParForProgramBlock(prog, iterPredData,iterPred.getParForParams());
-				ParForProgramBlock pfrtpb = (ParForProgramBlock)rtpb;
-				pfrtpb.setResultVariables( ((ParForStatementBlock)sb).getResultVariables() );
-				pfrtpb.setStatementBlock((ParForStatementBlock)sb); //used for optimization and creating unscoped variables
-			}
-			else //ForStatementBlock
-			{
-				sbName = "ForStatementBlock";
-				rtpb = new ForProgramBlock(prog, iterPredData);
-			}
-			 
-			rtpb.setFromInstructions(      fromInstructions      );
-			rtpb.setToInstructions(        toInstructions        );
-			rtpb.setIncrementInstructions( incrementInstructions );
-			
-			rtpb.setIterablePredicateVars( iterPredData );
-			
-			// process the body of the for statement block
-			if (fsb.getNumStatements() > 1){
-				LOG.error(fsb.printBlockErrorLocation() + " "  + sbName + " should have 1 statement" );
-				throw new LopsException(fsb.printBlockErrorLocation() + " "  + sbName + " should have 1 statement" );
-			}
-			ForStatement fs = (ForStatement)fsb.getStatement(0);
-			for (StatementBlock sblock : fs.getBody()){
-				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
-				rtpb.addProgramBlock(childBlock); 
-			}
-		
-			// check there are actually Lops in to process (loop stmt body will not have any)
-			if (fsb.getLops() != null && !fsb.getLops().isEmpty()){
-				LOG.error(fsb.printBlockErrorLocation() + sbName + " should have no Lops" );
-				throw new LopsException(fsb.printBlockErrorLocation() + sbName + " should have no Lops" );
-			}
-			
-			retPB = rtpb;
-			
-			//post processing for generating missing instructions
-			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
-			
-			// add statement block
-			retPB.setStatementBlock(sb);
-			
-			// add location information
-			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
-		}
-		
-		// process function statement block - add runtime program blocks to program
-		else if (sb instanceof FunctionStatementBlock){
-			
-			FunctionStatementBlock fsb = (FunctionStatementBlock)sb;
-			if (fsb.getNumStatements() > 1){
-				LOG.error(fsb.printBlockErrorLocation() + "FunctionStatementBlock should only have 1 statement");
-				throw new LopsException(fsb.printBlockErrorLocation() + "FunctionStatementBlock should only have 1 statement");
-			}
-			FunctionStatement fstmt = (FunctionStatement)fsb.getStatement(0);
-			FunctionProgramBlock rtpb = null;
-			
-			if (fstmt instanceof ExternalFunctionStatement) {
-				 // create external function program block
-				
-				String execType = ((ExternalFunctionStatement) fstmt)
-                				    .getOtherParams().get(ExternalFunctionStatement.EXEC_TYPE);
-				boolean isCP = (execType.equals(ExternalFunctionStatement.IN_MEMORY)) ? true : false;
-				
-				String scratchSpaceLoc = null;
-				try {
-					scratchSpaceLoc = config.getTextValue(DMLConfig.SCRATCH_SPACE);
-				} catch (Exception e){
-					LOG.error(fsb.printBlockErrorLocation() + "could not retrieve parameter " + DMLConfig.SCRATCH_SPACE + " from DMLConfig");
-				}				
-				StringBuilder buff = new StringBuilder();
-				buff.append(scratchSpaceLoc);
-				buff.append(Lop.FILE_SEPARATOR);
-				buff.append(Lop.PROCESS_PREFIX);
-				buff.append(DMLScript.getUUID());
-				buff.append(Lop.FILE_SEPARATOR);
-				buff.append(ProgramConverter.CP_ROOT_THREAD_ID);
-				buff.append(Lop.FILE_SEPARATOR);
-				buff.append("PackageSupport");
-				buff.append(Lop.FILE_SEPARATOR);
-				String basedir =  buff.toString();
-				
-				if( isCP )
-				{
-					
-					rtpb = new ExternalFunctionProgramBlockCP(prog, 
-									fstmt.getInputParams(), fstmt.getOutputParams(), 
-									((ExternalFunctionStatement) fstmt).getOtherParams(),
-									basedir );					
-				}
-				else
-				{
-					rtpb = new ExternalFunctionProgramBlock(prog, 
-									fstmt.getInputParams(), fstmt.getOutputParams(), 
-									((ExternalFunctionStatement) fstmt).getOtherParams(),
-									basedir);
-				}
-				
-				if (!fstmt.getBody().isEmpty()){
-					LOG.error(fstmt.printErrorLocation() + "ExternalFunctionStatementBlock should have no statement blocks in body");
-					throw new LopsException(fstmt.printErrorLocation() + "ExternalFunctionStatementBlock should have no statement blocks in body");
-				}
-			}
-			else 
-			{
-				// create function program block
-				rtpb = new FunctionProgramBlock(prog, fstmt.getInputParams(), fstmt.getOutputParams());
-				
-				// process the function statement body
-				for (StatementBlock sblock : fstmt.getBody()){	
-					// process the body
-					ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
-					rtpb.addProgramBlock(childBlock);
-				}
-			}
-			
-			// check there are actually Lops in to process (loop stmt body will not have any)
-			if (fsb.getLops() != null && !fsb.getLops().isEmpty()){
-				LOG.error(fsb.printBlockErrorLocation() + "FunctionStatementBlock should have no Lops");
-				throw new LopsException(fsb.printBlockErrorLocation() + "FunctionStatementBlock should have no Lops");
-			}
-			
-			retPB = rtpb;
-			
-			// add location information
-			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
-		}
-		else {
-	
-			// handle general case
-			ProgramBlock rtpb = new ProgramBlock(prog);
-		
-			// DAGs for Lops
-			dag = new Dag<Lop>();
-
-			// check there are actually Lops in to process (loop stmt body will not have any)
-			if (sb.getLops() != null && !sb.getLops().isEmpty()){
-			
-				for (Lop l : sb.getLops()) {
-					l.addToDag(dag);
-				}
-				
-				// Instructions for Lobs DAGs
-				instruct = dag.getJobs(sb, config);
-				rtpb.addInstructions(instruct);
-			}
-			
-			/*// TODO: check with Doug
-			// add instruction for a function call
-			if (sb.getFunctionCallInst() != null){
-				rtpb.addInstruction(sb.getFunctionCallInst());
-			}*/
-			
-			retPB = rtpb;
-			
-			//post processing for generating missing instructions
-			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
-			
-			// add statement block
-			retPB.setStatementBlock(sb);
-			
-			// add location information
-			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
-		}
-		
-		return retPB;
-	}	
-	
-	/**
-	 * Post processing of each created program block in order to adhere to livein/liveout
-	 * (currently needed for cleanup (especially for caching) of intermediate results if the last datasink 
-	 * is an external function because instructions of external functions are created outside hops/lops,
-	 * e.g., X=..., Y=fun(X) and X is not used afterwards )
-	 * 
-	 * NOTES: 
-	 * (1) Rule1: checking livein and liveout is sufficient because the last external function is in its own
-	 * programblock anyway.
-	 * (2) as we cannot efficiently distinguish if the problematic var is created by an external function
-	 * or some other instruction, we generate RMVAR instructions although for vars created by non-CP
-	 * external functions RMFILEVAR instructions are required. However, all remaining files in scratch_space
-	 * are cleaned after execution anyway.
-	 * (3) As an alternative to doing rule 2, we could also check for existing objects in createvar and function invocation
-	 * (or generic at program block level) and remove objects of previous iterations accordingly (but objects of last iteration
-	 * would still require separate cleanup).
-	 * 
-	 * TODO: MB: external function invocations should become hops/lops as well (see instruction gen in DMLTranslator), 
-	 * (currently not possible at Hops/Lops level due the requirement of multiple outputs for functions) 
-	 * TODO: MB: we should in general always leverage livein/liveout during hops/lops generation.
-	 * TODO: MB: verify and correct can be removed once everything is integrated in hops/lops generation
-	 * 
-	 * @param in
-	 * @param out
-	 * @param pb
-	 * @return
-	 * @throws DMLRuntimeException 
-	 */
-	@SuppressWarnings("unused")
-	private ProgramBlock verifyAndCorrectProgramBlock(VariableSet in, VariableSet out, VariableSet kill, ProgramBlock pb) 
-		throws DMLRuntimeException
-	{	
-		//RULE 1: if in IN and not in OUT, then there should be an rmvar or rmfilevar inst
-		//(currently required for specific cases of external functions)
-		for( String varName : in.getVariableNames() )
-			if( !out.containsVariable(varName) ) 
-			{
-				DataType dt = in.getVariable(varName).getDataType();
-				if( !(dt==DataType.MATRIX || dt==DataType.UNKNOWN) )
-					continue; //skip rm instructions for non-matrix objects
-				
-				boolean foundRMInst = rContainsRMInstruction(pb, varName);
-				
-				if( !foundRMInst )
-				{
-					//create RMVAR instruction and put it into the programblock
-					Instruction inst = createCleanupInstruction(varName);
-					
-					inst.setLocation(in.getVariable(varName));
-					
-					addCleanupInstruction(pb, inst);
-
-					LOG.trace("Adding instruction (r1) "+inst.toString());
-				}		
-			}
-
-		//RULE 2: if in KILL and not in IN and not in OUT, then there should be an rmvar or rmfilevar inst
-		//(currently required for specific cases of nested loops)
-		for( String varName : kill.getVariableNames() )
-			if( (!in.containsVariable(varName)) && (!out.containsVariable(varName)) ) 
-			{
-				DataType dt = kill.getVariable(varName).getDataType();
-				if( !(dt==DataType.MATRIX || dt==DataType.UNKNOWN) )
-					continue; //skip rm instructions for non-matrix objects
-				
-				boolean foundRMInst = rContainsRMInstruction(pb, varName);
-				
-				if( !foundRMInst )
-				{
-					//create RMVAR instruction and put it into the programblock
-					Instruction inst = createCleanupInstruction(varName);
-					
-					inst.setLocation(kill.getVariable(varName));
-					
-					//System.out.println("add rvar rule2 "+inst.toString());
-					addCleanupInstruction(pb, inst);
-					
-					LOG.trace("Adding instruction (r2) "+inst.toString());
-				}		
-			}
-		
-		return pb;
-	}
-	
-	private Instruction createCleanupInstruction(String varName) 
-		throws DMLRuntimeException
-	{
-		//(example "CP+Lops.OPERAND_DELIMITOR+rmvar+Lops.OPERAND_DELIMITOR+Var7")
-		StringBuilder sb = new StringBuilder();
-		sb.append("CP");
-		sb.append(Lop.OPERAND_DELIMITOR);
-		sb.append("rmvar");
-		sb.append(Lop.OPERAND_DELIMITOR);
-		sb.append(varName);
-		String str = sb.toString();
-		Instruction inst = CPInstructionParser.parseSingleInstruction( str );
-		
-		return inst;
-	}
-	
-	/**
-	 * Determines if the given program block includes a RMVAR or RMFILEVAR
-	 * instruction for the given varName.
-	 * 
-	 * @param pb program block
-	 * @param varName variable name
-	 * @return true if program block contains remove instruction for variable
-	 */
-	private boolean rContainsRMInstruction(ProgramBlock pb, String varName)
-	{	
-		if (pb instanceof WhileProgramBlock)
-		{
-			WhileProgramBlock tmp = (WhileProgramBlock)pb;	
-			for( ProgramBlock c : tmp.getChildBlocks() )
-				if( rContainsRMInstruction(c, varName) )
-					return true;
-		}
-		else if (pb instanceof IfProgramBlock)
-		{
-			IfProgramBlock tmp = (IfProgramBlock)pb;	
-			for( ProgramBlock c : tmp.getChildBlocksIfBody() )
-				if( rContainsRMInstruction(c, varName) )
-					return true;
-			for( ProgramBlock c : tmp.getChildBlocksElseBody() )
-				if( rContainsRMInstruction(c, varName) )
-					return true;
-		}
-		else if (pb instanceof ForProgramBlock) //includes ParFORProgramBlock
-		{ 
-			ForProgramBlock tmp = (ForProgramBlock)pb;	
-			for( ProgramBlock c : tmp.getChildBlocks() )
-				if( rContainsRMInstruction(c, varName) )
-					return true;
-		}		
-		else if (  pb instanceof FunctionProgramBlock ) //includes ExternalFunctionProgramBlock and ExternalFunctionProgramBlockCP)
-		{
-			//do nothing
-		}
-		else 
-		{
-			for( Instruction inst : pb.getInstructions() )
-				if( inst instanceof VariableCPInstruction 
-					&& ((VariableCPInstruction) inst).isRemoveVariable(varName) )
-					return true;
-		}
-		
-		return false;
-	}
-	
-	/**
-	 * Adds the generated cleanup RMVAR instruction to the given program block.
-	 * In case of generic (last-level) programblocks it is added to the end of 
-	 * the list of instructions, while for complex program blocks it is added to
-	 * the end of the list of exit instructions.
-	 * 
-	 * @param pb program block
-	 * @param inst instruction
-	 * @throws DMLRuntimeException if DMLRuntimeException occurs
-	 */
-	private void addCleanupInstruction( ProgramBlock pb, Instruction inst ) 
-		throws DMLRuntimeException
-	{
-		//System.out.println("Adding rm var instructions: "+inst.toString());
-		
-		if (pb instanceof WhileProgramBlock)
-		{
-			WhileProgramBlock wpb = (WhileProgramBlock)pb;
-			ArrayList<ProgramBlock> childs = wpb.getChildBlocks();
-			if( !childs.get(childs.size()-1).getInstructions().isEmpty() ) //generic last level pb
-				childs.get(childs.size()-1).addInstruction(inst);
-			else{
-				ProgramBlock pbNew = new ProgramBlock(pb.getProgram());
-				pbNew.addInstruction(inst);
-				childs.add(pbNew); 
-			}
-		}
-		else if (pb instanceof ForProgramBlock) //includes ParFORProgramBlock
-		{
-			ForProgramBlock wpb = (ForProgramBlock)pb;
-			ArrayList<ProgramBlock> childs = wpb.getChildBlocks();
-			if( !childs.get(childs.size()-1).getInstructions().isEmpty() ) //generic last level pb
-				childs.get(childs.size()-1).addInstruction(inst);
-			else{
-				ProgramBlock pbNew = new ProgramBlock(pb.getProgram());
-				pbNew.addInstruction(inst);
-				childs.add(pbNew); 
-			}
-		}
-		else if (pb instanceof IfProgramBlock)
-			((IfProgramBlock)pb).addExitInstruction(inst);
-		else if (   pb instanceof FunctionProgramBlock )  //includes ExternalFunctionProgramBlock and ExternalFunctionProgramBlockCP)
-			; //do nothing
-		else 
-		{
-			pb.addInstruction(inst); //add inst at end of pb	
-		}
-	}
-	
-	public static String constructFunctionKey(String fnamespace, String fname)
-	{
+	public static String constructFunctionKey(String fnamespace, String fname) {
 		return fnamespace + Program.KEY_DELIM + fname;
 	}
 	
-	public static String[] splitFunctionKey(String fkey)
-	{
+	public static String[] splitFunctionKey(String fkey) {
 		return fkey.split(Program.KEY_DELIM);
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 61f7217..3708bb6 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -27,7 +27,9 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
@@ -58,12 +60,16 @@ import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.TernaryOp;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.codegen.SpoofCompiler;
+import org.apache.sysml.hops.codegen.SpoofCompiler.IntegrationType;
+import org.apache.sysml.hops.codegen.SpoofCompiler.PlanCachePolicy;
 import org.apache.sysml.hops.ipa.InterProceduralAnalysis;
 import org.apache.sysml.hops.recompile.Recompiler;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.hops.rewrite.ProgramRewriter;
 import org.apache.sysml.lops.Lop;
+import org.apache.sysml.lops.LopProperties;
 import org.apache.sysml.lops.LopsException;
+import org.apache.sysml.lops.compile.Dag;
 import org.apache.sysml.parser.Expression.BuiltinFunctionOp;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.FormatType;
@@ -71,7 +77,17 @@ import org.apache.sysml.parser.Expression.ParameterizedBuiltinFunctionOp;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.parser.PrintStatement.PRINTTYPE;
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock;
+import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
+import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
+import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
+import org.apache.sysml.runtime.controlprogram.IfProgramBlock;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
 import org.apache.sysml.runtime.controlprogram.Program;
+import org.apache.sysml.runtime.controlprogram.ProgramBlock;
+import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
+import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
+import org.apache.sysml.runtime.instructions.Instruction;
 
 
 public class DMLTranslator 
@@ -256,7 +272,7 @@ public class DMLTranslator
 	}
 
 	public void rewriteHopsDAG(DMLProgram dmlp) 
-		throws ParseException, LanguageException, HopsException 
+		throws ParseException, LanguageException, HopsException, DMLRuntimeException 
 	{
 		//apply hop rewrites (static rewrites)
 		ProgramRewriter rewriter = new ProgramRewriter(true, false);
@@ -275,10 +291,21 @@ public class DMLTranslator
 		rewriter2.rewriteProgramHopDAGs(dmlp);
 		resetHopsDAGVisitStatus(dmlp);
 		
-		// Compute memory estimates for all the hops. These estimates are used
-		// subsequently in various optimizations, e.g. CP vs. MR scheduling and parfor.
+		//compute memory estimates for all the hops. These estimates are used
+		//subsequently in various optimizations, e.g. CP vs. MR scheduling and parfor.
 		refreshMemEstimates(dmlp);
 		resetHopsDAGVisitStatus(dmlp);
+		
+		//enhance HOP DAGs by automatic operator fusion
+		DMLConfig dmlconf = ConfigurationManager.getDMLConfig();
+		if( ConfigurationManager.isCodegenEnabled() ){
+			SpoofCompiler.PLAN_CACHE_POLICY = PlanCachePolicy.get(
+				dmlconf.getBooleanValue(DMLConfig.CODEGEN_PLANCACHE),
+				dmlconf.getIntValue(DMLConfig.CODEGEN_LITERALS)==2);
+			SpoofCompiler.setExecTypeSpecificJavaCompiler();
+			if( SpoofCompiler.INTEGRATION==IntegrationType.HOPS )
+				codgenHopsDAG(dmlp);
+		}
 	}
 	
 	public void codgenHopsDAG(DMLProgram dmlp)
@@ -418,6 +445,376 @@ public class DMLTranslator
 		
 	} // end method
 	
+	
+	public Program getRuntimeProgram(DMLProgram prog, DMLConfig config) 
+		throws IOException, LanguageException, DMLRuntimeException, LopsException, HopsException 
+	{	
+		// constructor resets the set of registered functions
+		Program rtprog = new Program();
+		
+		// for all namespaces, translate function statement blocks into function program blocks
+		for (String namespace : prog.getNamespaces().keySet()){
+		
+			for (String fname : prog.getFunctionStatementBlocks(namespace).keySet()){
+				// add program block to program
+				FunctionStatementBlock fsb = prog.getFunctionStatementBlocks(namespace).get(fname);
+				FunctionProgramBlock rtpb = (FunctionProgramBlock)createRuntimeProgramBlock(rtprog, fsb, config);
+				rtprog.addFunctionProgramBlock(namespace, fname, rtpb);
+				rtpb.setRecompileOnce( fsb.isRecompileOnce() );
+			}
+		}
+		
+		// translate all top-level statement blocks to program blocks
+		for (StatementBlock sb : prog.getStatementBlocks() ) {
+		
+			// add program block to program
+			ProgramBlock rtpb = createRuntimeProgramBlock(rtprog, sb, config);
+			rtprog.addProgramBlock(rtpb);
+		}
+		
+		//enhance runtime program by automatic operator fusion
+		if( ConfigurationManager.isCodegenEnabled() 
+			&& SpoofCompiler.INTEGRATION==IntegrationType.RUNTIME ){
+			codgenHopsDAG(rtprog);
+		}
+		
+		return rtprog ;
+	}
+	
+	public ProgramBlock createRuntimeProgramBlock(Program prog, StatementBlock sb, DMLConfig config) 
+		throws IOException, LopsException, DMLRuntimeException 
+	{
+		Dag<Lop> dag = null; 
+		Dag<Lop> pred_dag = null;
+
+		ArrayList<Instruction> instruct;
+		ArrayList<Instruction> pred_instruct = null;
+		
+		ProgramBlock retPB = null;
+		
+		// process While Statement - add runtime program blocks to program
+		if (sb instanceof WhileStatementBlock){
+		
+			// create DAG for loop predicates
+			pred_dag = new Dag<Lop>();
+			((WhileStatementBlock) sb).get_predicateLops().addToDag(pred_dag);
+			
+			// create instructions for loop predicates
+			pred_instruct = new ArrayList<Instruction>();
+			ArrayList<Instruction> pInst = pred_dag.getJobs(null, config);
+			for (Instruction i : pInst ) {
+				pred_instruct.add(i);
+			}
+			
+			// create while program block
+			WhileProgramBlock rtpb = new WhileProgramBlock(prog, pred_instruct);
+			
+			if (rtpb.getPredicateResultVar() == null) {
+				// e.g case : WHILE(continue)
+				if ( ((WhileStatementBlock) sb).get_predicateLops().getExecLocation() == LopProperties.ExecLocation.Data ) {
+					String resultVar = ((WhileStatementBlock) sb).get_predicateLops().getOutputParameters().getLabel();
+					rtpb.setPredicateResultVar( resultVar );
+				}
+				else {
+					LOG.error(sb.printBlockErrorLocation() + "Error in translating the WHILE predicate."); 
+					throw new LopsException(sb.printBlockErrorLocation() + "Error in translating the WHILE predicate."); 
+			
+				}
+			}			
+			//// process the body of the while statement block ////
+			
+			WhileStatementBlock wsb = (WhileStatementBlock)sb;
+			if (wsb.getNumStatements() > 1){
+				LOG.error(wsb.printBlockErrorLocation() + "WhileStatementBlock should only have 1 statement");
+				throw new LopsException(wsb.printBlockErrorLocation() + "WhileStatementBlock should only have 1 statement");
+			}
+			WhileStatement wstmt = (WhileStatement)wsb.getStatement(0);
+			for (StatementBlock sblock : wstmt.getBody()){
+				
+				// process the body
+				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
+				rtpb.addProgramBlock(childBlock);
+			}
+			
+			// check there are actually Lops in to process (loop stmt body will not have any)
+			if (wsb.getLops() != null && !wsb.getLops().isEmpty() ){
+				LOG.error(wsb.printBlockErrorLocation() + "WhileStatementBlock should have no Lops");
+				throw new LopsException(wsb.printBlockErrorLocation() + "WhileStatementBlock should have no Lops");
+			}
+			
+			
+			retPB = rtpb;
+			
+			//post processing for generating missing instructions
+			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
+			
+			// add statement block
+			retPB.setStatementBlock(sb);
+			
+			// add location information
+			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
+		}
+		
+		// process If Statement - add runtime program blocks to program
+		else if (sb instanceof IfStatementBlock){
+		
+			// create DAG for loop predicates
+			pred_dag = new Dag<Lop>();
+			((IfStatementBlock) sb).get_predicateLops().addToDag(pred_dag);
+			
+			// create instructions for loop predicates
+			pred_instruct = new ArrayList<Instruction>();
+			ArrayList<Instruction> pInst = pred_dag.getJobs(null, config);
+			for (Instruction i : pInst ) {
+				pred_instruct.add(i);
+			}
+			
+			// create if program block
+			IfProgramBlock rtpb = new IfProgramBlock(prog, pred_instruct);
+			
+			if (rtpb.getPredicateResultVar() == null ) {
+				// e.g case : If(continue)
+				if ( ((IfStatementBlock) sb).get_predicateLops().getExecLocation() == LopProperties.ExecLocation.Data ) {
+					String resultVar = ((IfStatementBlock) sb).get_predicateLops().getOutputParameters().getLabel();
+					rtpb.setPredicateResultVar( resultVar );
+				}
+				else {
+					LOG.error(sb.printBlockErrorLocation() + "Error in translating the IF predicate."); 
+					throw new LopsException(sb.printBlockErrorLocation() + "Error in translating the IF predicate."); 
+				}
+			}
+			
+			// process the body of the if statement block
+			IfStatementBlock isb = (IfStatementBlock)sb;
+			if (isb.getNumStatements() > 1){
+				LOG.error(isb.printBlockErrorLocation() + "IfStatementBlock should have only 1 statement");
+				throw new LopsException(isb.printBlockErrorLocation() + "IfStatementBlock should have only 1 statement");
+			}
+			IfStatement istmt = (IfStatement)isb.getStatement(0);
+			
+			// process the if body
+			for (StatementBlock sblock : istmt.getIfBody()){
+				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
+				rtpb.addProgramBlockIfBody(childBlock);
+			}
+			
+			// process the else body
+			for (StatementBlock sblock : istmt.getElseBody()){
+				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
+				rtpb.addProgramBlockElseBody(childBlock); 
+			}
+			
+			// check there are actually Lops in to process (loop stmt body will not have any)
+			if (isb.getLops() != null && !isb.getLops().isEmpty() ){
+				LOG.error(isb.printBlockErrorLocation() + "IfStatementBlock should have no Lops");
+				throw new LopsException(isb.printBlockErrorLocation() + "IfStatementBlock should have no Lops");
+			}
+			
+			retPB = rtpb;
+			
+			//post processing for generating missing instructions
+			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
+			
+			// add statement block
+			retPB.setStatementBlock(sb);
+			
+			// add location information
+			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
+		}
+		
+		// process For Statement - add runtime program blocks to program
+		// NOTE: applies to ForStatementBlock and ParForStatementBlock
+		else if (sb instanceof ForStatementBlock) 
+		{ 
+			ForStatementBlock fsb = (ForStatementBlock) sb;
+			
+			// create DAGs for loop predicates 
+			Dag<Lop> fromDag = new Dag<Lop>();
+			Dag<Lop> toDag = new Dag<Lop>();
+			Dag<Lop> incrementDag = new Dag<Lop>();
+			if( fsb.getFromHops()!=null )
+				fsb.getFromLops().addToDag(fromDag);
+			if( fsb.getToHops()!=null )
+				fsb.getToLops().addToDag(toDag);		
+			if( fsb.getIncrementHops()!=null )
+				fsb.getIncrementLops().addToDag(incrementDag);		
+				
+			// create instructions for loop predicates			
+			ArrayList<Instruction> fromInstructions = fromDag.getJobs(null, config);
+			ArrayList<Instruction> toInstructions = toDag.getJobs(null, config);
+			ArrayList<Instruction> incrementInstructions = incrementDag.getJobs(null, config);		
+
+			// create for program block
+			String sbName = null;
+			ForProgramBlock rtpb = null;
+			IterablePredicate iterPred = fsb.getIterPredicate();
+			String [] iterPredData= IterablePredicate.createIterablePredicateVariables(iterPred.getIterVar().getName(),
+					                                                                   fsb.getFromLops(), fsb.getToLops(), fsb.getIncrementLops()); 
+			
+			if( sb instanceof ParForStatementBlock )
+			{
+				sbName = "ParForStatementBlock";
+				rtpb = new ParForProgramBlock(prog, iterPredData,iterPred.getParForParams());
+				ParForProgramBlock pfrtpb = (ParForProgramBlock)rtpb;
+				pfrtpb.setResultVariables( ((ParForStatementBlock)sb).getResultVariables() );
+				pfrtpb.setStatementBlock((ParForStatementBlock)sb); //used for optimization and creating unscoped variables
+			}
+			else //ForStatementBlock
+			{
+				sbName = "ForStatementBlock";
+				rtpb = new ForProgramBlock(prog, iterPredData);
+			}
+			 
+			rtpb.setFromInstructions(      fromInstructions      );
+			rtpb.setToInstructions(        toInstructions        );
+			rtpb.setIncrementInstructions( incrementInstructions );
+			
+			rtpb.setIterablePredicateVars( iterPredData );
+			
+			// process the body of the for statement block
+			if (fsb.getNumStatements() > 1){
+				LOG.error(fsb.printBlockErrorLocation() + " "  + sbName + " should have 1 statement" );
+				throw new LopsException(fsb.printBlockErrorLocation() + " "  + sbName + " should have 1 statement" );
+			}
+			ForStatement fs = (ForStatement)fsb.getStatement(0);
+			for (StatementBlock sblock : fs.getBody()){
+				ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
+				rtpb.addProgramBlock(childBlock); 
+			}
+		
+			// check there are actually Lops in to process (loop stmt body will not have any)
+			if (fsb.getLops() != null && !fsb.getLops().isEmpty()){
+				LOG.error(fsb.printBlockErrorLocation() + sbName + " should have no Lops" );
+				throw new LopsException(fsb.printBlockErrorLocation() + sbName + " should have no Lops" );
+			}
+			
+			retPB = rtpb;
+			
+			//post processing for generating missing instructions
+			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
+			
+			// add statement block
+			retPB.setStatementBlock(sb);
+			
+			// add location information
+			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
+		}
+		
+		// process function statement block - add runtime program blocks to program
+		else if (sb instanceof FunctionStatementBlock){
+			
+			FunctionStatementBlock fsb = (FunctionStatementBlock)sb;
+			if (fsb.getNumStatements() > 1){
+				LOG.error(fsb.printBlockErrorLocation() + "FunctionStatementBlock should only have 1 statement");
+				throw new LopsException(fsb.printBlockErrorLocation() + "FunctionStatementBlock should only have 1 statement");
+			}
+			FunctionStatement fstmt = (FunctionStatement)fsb.getStatement(0);
+			FunctionProgramBlock rtpb = null;
+			
+			if (fstmt instanceof ExternalFunctionStatement) {
+				 // create external function program block
+				
+				String execType = ((ExternalFunctionStatement) fstmt)
+                				    .getOtherParams().get(ExternalFunctionStatement.EXEC_TYPE);
+				boolean isCP = (execType.equals(ExternalFunctionStatement.IN_MEMORY)) ? true : false;
+				
+				String scratchSpaceLoc = null;
+				try {
+					scratchSpaceLoc = config.getTextValue(DMLConfig.SCRATCH_SPACE);
+				} catch (Exception e){
+					LOG.error(fsb.printBlockErrorLocation() + "could not retrieve parameter " + DMLConfig.SCRATCH_SPACE + " from DMLConfig");
+				}				
+				StringBuilder buff = new StringBuilder();
+				buff.append(scratchSpaceLoc);
+				buff.append(Lop.FILE_SEPARATOR);
+				buff.append(Lop.PROCESS_PREFIX);
+				buff.append(DMLScript.getUUID());
+				buff.append(Lop.FILE_SEPARATOR);
+				buff.append(ProgramConverter.CP_ROOT_THREAD_ID);
+				buff.append(Lop.FILE_SEPARATOR);
+				buff.append("PackageSupport");
+				buff.append(Lop.FILE_SEPARATOR);
+				String basedir =  buff.toString();
+				
+				if( isCP )
+				{
+					
+					rtpb = new ExternalFunctionProgramBlockCP(prog, 
+									fstmt.getInputParams(), fstmt.getOutputParams(), 
+									((ExternalFunctionStatement) fstmt).getOtherParams(),
+									basedir );					
+				}
+				else
+				{
+					rtpb = new ExternalFunctionProgramBlock(prog, 
+									fstmt.getInputParams(), fstmt.getOutputParams(), 
+									((ExternalFunctionStatement) fstmt).getOtherParams(),
+									basedir);
+				}
+				
+				if (!fstmt.getBody().isEmpty()){
+					LOG.error(fstmt.printErrorLocation() + "ExternalFunctionStatementBlock should have no statement blocks in body");
+					throw new LopsException(fstmt.printErrorLocation() + "ExternalFunctionStatementBlock should have no statement blocks in body");
+				}
+			}
+			else 
+			{
+				// create function program block
+				rtpb = new FunctionProgramBlock(prog, fstmt.getInputParams(), fstmt.getOutputParams());
+				
+				// process the function statement body
+				for (StatementBlock sblock : fstmt.getBody()){	
+					// process the body
+					ProgramBlock childBlock = createRuntimeProgramBlock(prog, sblock, config);
+					rtpb.addProgramBlock(childBlock);
+				}
+			}
+			
+			// check there are actually Lops in to process (loop stmt body will not have any)
+			if (fsb.getLops() != null && !fsb.getLops().isEmpty()){
+				LOG.error(fsb.printBlockErrorLocation() + "FunctionStatementBlock should have no Lops");
+				throw new LopsException(fsb.printBlockErrorLocation() + "FunctionStatementBlock should have no Lops");
+			}
+			
+			retPB = rtpb;
+			
+			// add location information
+			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
+		}
+		else {
+	
+			// handle general case
+			ProgramBlock rtpb = new ProgramBlock(prog);
+		
+			// DAGs for Lops
+			dag = new Dag<Lop>();
+
+			// check there are actually Lops in to process (loop stmt body will not have any)
+			if (sb.getLops() != null && !sb.getLops().isEmpty()){
+			
+				for (Lop l : sb.getLops()) {
+					l.addToDag(dag);
+				}
+				
+				// Instructions for Lobs DAGs
+				instruct = dag.getJobs(sb, config);
+				rtpb.addInstructions(instruct);
+			}
+			
+			retPB = rtpb;
+			
+			//post processing for generating missing instructions
+			//retPB = verifyAndCorrectProgramBlock(sb.liveIn(), sb.liveOut(), sb._kill, retPB);
+			
+			// add statement block
+			retPB.setStatementBlock(sb);
+			
+			// add location information
+			retPB.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
+		}
+		
+		return retPB;
+	}
 		
 	public void printLops(DMLProgram dmlp) throws ParseException, LanguageException, HopsException, LopsException {
 		if (LOG.isDebugEnabled()){
@@ -1533,8 +1930,7 @@ public class DMLTranslator
 			throw new ParseException(target.printErrorLocation() + " must define matrix " + target.getName() + " before indexing operations are allowed ");
 		}
 		
-		//TODO Doug, please verify this (we need probably a cleaner way than this postprocessing)
-		if( sourceOp.getDataType() == DataType.MATRIX && source.getOutput().getDataType() == DataType.SCALAR )
+		if( sourceOp.getDataType().isMatrix() && source.getOutput().getDataType().isScalar() )
 			sourceOp.setDataType(DataType.SCALAR);
 		
 		Hop leftIndexOp = new LeftIndexingOp(target.getName(), target.getDataType(), ValueType.DOUBLE, 

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/ProgramRecompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/ProgramRecompiler.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/ProgramRecompiler.java
index 2d03af3..189d061 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/ProgramRecompiler.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/ProgramRecompiler.java
@@ -71,8 +71,7 @@ public class ProgramRecompiler
 		
 		//construct runtime program from lops
 		for( StatementBlock sb : sbs ) {
-			DMLProgram prog = sb.getDMLProg();
-			ret.add( prog.createRuntimeProgramBlock(rtprog, sb, config) );
+			ret.add(dmlt.createRuntimeProgramBlock(rtprog, sb, config));
 		}
 		
 		return ret;

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java
index a72b89e..847682c 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -33,7 +33,6 @@ import java.util.concurrent.atomic.LongAdder;
 
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
@@ -729,7 +728,7 @@ public class Statistics
 				sb.append("Functions recompiled:\t\t" + getFunRecompiles() + ".\n");
 				sb.append("Functions recompile time:\t" + String.format("%.3f", ((double)getFunRecompileTime())/1000000000) + " sec.\n");	
 			}
-			if( ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.CODEGEN) ) {
+			if( ConfigurationManager.isCodegenEnabled() ) {
 				sb.append("Codegen compile (DAG, CP, JC):\t" + getCodegenDAGCompile() + "/" + getCodegenCPlanCompile() + "/" + getCodegenClassCompile() + ".\n");
 				sb.append("Codegen compile times (DAG,JC):\t" + String.format("%.3f", (double)getCodegenCompileTime()/1000000000) + "/" + 
 						String.format("%.3f", (double)getCodegenClassCompileTime()/1000000000)  + " sec.\n");

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/test/java/org/apache/sysml/test/integration/functions/codegen/APICodegenTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/APICodegenTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/APICodegenTest.java
new file mode 100644
index 0000000..7e6ead1
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/APICodegenTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import static org.apache.sysml.api.mlcontext.ScriptFactory.dml;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.jmlc.Connection;
+import org.apache.sysml.api.jmlc.PreparedScript;
+import org.apache.sysml.api.mlcontext.MLContext;
+import org.apache.sysml.api.mlcontext.Script;
+import org.apache.sysml.conf.CompilerConfig.ConfigType;
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.util.DataConverter;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.utils.Statistics;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class APICodegenTest extends AutomatedTestBase
+{
+	protected final static String TEST_DIR = "org/apache/sysml/api/mlcontext";
+	protected final static String TEST_NAME = "MLContext";
+
+	private final static int rows = 100;
+	private final static int cols = 1600;
+	private final static double sparsity = 0.7;
+
+	@Override
+	public void setUp() {
+		addTestConfiguration(TEST_DIR, TEST_NAME);
+		getAndLoadTestConfiguration(TEST_NAME);
+	}
+	
+	@Test
+	public void testCodegenMLContext() {
+		runMLContextParforDatasetTest(false);
+	}
+	
+	@Test
+	public void testCodegenJMLCTest() {
+		runMLContextParforDatasetTest(true);
+	}
+
+	private void runMLContextParforDatasetTest(boolean jmlc) 
+	{
+		try {
+			double[][] X = getRandomMatrix(rows, cols, -10, 10, sparsity, 76543); 
+			MatrixBlock mX = DataConverter.convertToMatrixBlock(X); 
+			
+			String s = "X = read(\"/tmp\");"
+				+ "R = colSums(X/rowSums(X));"
+				+ "write(R, \"tmp2\")";
+			
+			//execute scripts
+			if( jmlc ) {
+				DMLScript.STATISTICS = true;
+				Connection conn = new Connection(ConfigType.CODEGEN_ENABLED, 
+					ConfigType.ALLOW_DYN_RECOMPILATION);
+				PreparedScript pscript = conn.prepareScript(
+					s, new String[]{"X"}, new String[]{"R"}, false); 
+				pscript.setMatrix("X", mX, false);
+				pscript.executeScript();
+				conn.close();
+				System.out.println(Statistics.display());
+			}
+			else {
+				SparkConf conf = SparkExecutionContext.createSystemMLSparkConf()
+					.setAppName("MLContextTest").setMaster("local");
+				JavaSparkContext sc = new JavaSparkContext(conf);
+				MLContext ml = new MLContext(sc);
+				ml.setConfigProperty("codegen.enabled", "true");
+				ml.setStatistics(true);
+				Script script = dml(s).in("X", mX).out("R");
+				ml.execute(script);
+				ml.resetConfig();
+				sc.stop();
+				ml.close();
+			}
+			
+			//check for generated operator
+			Assert.assertTrue(heavyHittersContainsSubString("spoofRA"));
+		}
+		catch(Exception ex) {
+			throw new RuntimeException(ex);
+		}
+	}
+
+	@After
+	public void tearDown() {
+		super.tearDown();
+	}
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/856230c5/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
index a7d45bf..63be419 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
@@ -33,6 +33,7 @@ import org.junit.runners.Suite;
 	AlgorithmMLogreg.class,
 	AlgorithmMSVM.class,
 	AlgorithmPNMF.class,
+	APICodegenTest.class,
 	CellwiseTmplTest.class,
 	CompressedCellwiseTest.class,
 	CompressedMultiAggregateTest.class,


[2/5] systemml git commit: [SYSTEMML-1787] Column range indexing in codegen rowwise operators

Posted by mb...@apache.org.
[SYSTEMML-1787] Column range indexing in codegen rowwise operators

This patch extends the code generator row-wise template (compiler/
runtime) by column range indexing, where for each row we lookup a row
vector from a side input. This is useful to support expressions as the
following (which is part of the inner loop of Mlogreg with multiple
classes):

Q = P[,1:K] * (X %*% ssX_V);
R = t(X) %*% (Q - P[,1:K] * rowSums(Q));

Fusing P[,1:K] into the surrounding row-wise template avoids an
additional intermediate in nrow(X) x #classes.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d7e4c5a5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d7e4c5a5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d7e4c5a5

Branch: refs/heads/master
Commit: d7e4c5a56b392b218235f82e2fbbe57626db2710
Parents: c6679b7
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Jul 21 15:57:07 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 22 13:53:15 2017 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/hops/LeftIndexingOp.java   |  4 +-
 .../sysml/hops/codegen/SpoofCompiler.java       |  5 ++-
 .../sysml/hops/codegen/cplan/CNodeTernary.java  | 21 ++++++++--
 .../hops/codegen/template/TemplateRow.java      |  9 +++--
 .../hops/codegen/template/TemplateUtils.java    | 15 ++++++--
 .../sysml/hops/rewrite/HopRewriteUtils.java     | 40 ++++++++++----------
 .../rewrite/RewriteForLoopVectorization.java    |  8 ++--
 .../rewrite/RewriteIndexingVectorization.java   |  8 ++--
 .../runtime/codegen/LibSpoofPrimitives.java     |  4 +-
 .../sysml/runtime/codegen/SpoofOperator.java    | 13 +++++++
 .../functions/codegen/RowAggTmplTest.java       |  3 +-
 11 files changed, 83 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
index 601cb6f..a641622 100644
--- a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
+++ b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
@@ -82,11 +82,11 @@ public class LeftIndexingOp  extends Hop
 		HopsException.check(_input.size() == 6, this, "should have 6 inputs but has %d inputs", 6);
 	}
 
-	public boolean getRowLowerEqualsUpper(){
+	public boolean isRowLowerEqualsUpper(){
 		return _rowLowerEqualsUpper;
 	}
 	
-	public boolean getColLowerEqualsUpper() {
+	public boolean isColLowerEqualsUpper() {
 		return _colLowerEqualsUpper;
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 87ed1a0..4a59d1b 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -688,7 +688,7 @@ public class SpoofCompiler
 			cplans2.put(e.getKey(), new Pair<Hop[],CNodeTpl>(inHops, tpl));
 			
 			//remove invalid plans with column indexing on main input
-			if( tpl instanceof CNodeCell ) {
+			if( tpl instanceof CNodeCell || tpl instanceof CNodeRow ) {
 				CNodeData in1 = (CNodeData)tpl.getInput().get(0);
 				if( rHasLookupRC1(tpl.getOutput(), in1) || isLookupRC1(tpl.getOutput(), in1) ) {
 					cplans2.remove(e.getKey());
@@ -776,7 +776,8 @@ public class SpoofCompiler
 	}
 	
 	private static boolean isLookupRC1(CNode node, CNodeData mainInput) {
-		return (node instanceof CNodeTernary && ((CNodeTernary)node).getType()==TernaryType.LOOKUP_RC1 
+		return (node instanceof CNodeTernary && (((CNodeTernary)node).getType()==TernaryType.LOOKUP_RC1 
+				|| ((CNodeTernary)node).getType()==TernaryType.LOOKUP_RVECT1 )
 				&& node.getInput().get(0) instanceof CNodeData
 				&& ((CNodeData)node.getInput().get(0)).getHopID() == mainInput.getHopID());
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
index 9a4b103..e9bb472 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
@@ -28,7 +28,8 @@ public class CNodeTernary extends CNode
 	public enum TernaryType {
 		PLUS_MULT, MINUS_MULT,
 		REPLACE, REPLACE_NAN,
-		LOOKUP_RC1;
+		LOOKUP_RC1, LOOKUP_RVECT1;
+		
 		
 		public static boolean contains(String value) {
 			for( TernaryType tt : values()  )
@@ -53,14 +54,19 @@ public class CNodeTernary extends CNode
 					return "    double %TMP% = Double.isNaN(%IN1%) ? %IN3% : %IN1%;\n";
 					
 				case LOOKUP_RC1:
-					return sparse ?
-							"    double %TMP% = getValue(%IN1v%, %IN2%, rowIndex, %IN3%-1);\n" :	
-							"    double %TMP% = getValue(%IN1%, %IN2%, rowIndex, %IN3%-1);\n";	
+					return "    double %TMP% = getValue(%IN1%, %IN2%, rowIndex, %IN3%-1);\n";
+					
+				case LOOKUP_RVECT1:
+					return "    double[] %TMP% = getVector(%IN1%, %IN2%, rowIndex, %IN3%-1);\n";
 					
 				default: 
 					throw new RuntimeException("Invalid ternary type: "+this.toString());
 			}
 		}
+		
+		public boolean isVectorPrimitive() {
+			return (this == LOOKUP_RVECT1);
+		}
 	}
 	
 	private final TernaryType _type;
@@ -116,6 +122,8 @@ public class CNodeTernary extends CNode
 			case REPLACE: 
 			case REPLACE_NAN: return "t(rplc)";
 			case LOOKUP_RC1: return "u(ixrc1)";
+			case LOOKUP_RVECT1: return "u(ixrv1)";
+			
 			default:
 				return super.toString();	
 		}
@@ -133,6 +141,11 @@ public class CNodeTernary extends CNode
 				_cols = 0;
 				_dataType= DataType.SCALAR;
 				break;
+			case LOOKUP_RVECT1:
+				_rows = 1;
+				_cols = _inputs.get(0)._cols;
+				_dataType= DataType.MATRIX;
+				break;
 		}
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 8445aeb..659b528 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -88,7 +88,8 @@ public class TemplateRow extends TemplateBase
 				&& isFuseSkinnyMatrixMult(hop.getParent().get(0)))
 			|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()!=Direction.RowCol 
 				&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1
-				&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG));
+				&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG))
+			|| (hop instanceof IndexingOp && HopRewriteUtils.isColumnRangeIndexing((IndexingOp)hop));
 	}
 
 	@Override
@@ -398,9 +399,9 @@ public class TemplateRow extends TemplateBase
 		{
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
 			out = new CNodeTernary(cdata1, 
-					TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), 
-					TemplateUtils.createCNodeData(hop.getInput().get(4), true),
-					TernaryType.LOOKUP_RC1);
+				TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), 
+				TemplateUtils.createCNodeData(hop.getInput().get(4), true),
+				(!hop.dimsKnown()||hop.getDim2()>1) ? TernaryType.LOOKUP_RVECT1 : TernaryType.LOOKUP_RC1);
 		}
 		
 		if( out == null ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index 4bd5bf1..647c9d3 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -32,6 +32,7 @@ import org.apache.sysml.hops.TernaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp2;
+import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.codegen.cplan.CNode;
 import org.apache.sysml.hops.codegen.cplan.CNodeBinary;
@@ -174,9 +175,10 @@ public class TemplateUtils
 	public static RowType getRowType(Hop output, Hop... inputs) {
 		Hop X = inputs[0];
 		Hop B1 = (inputs.length>1) ? inputs[1] : null;
-		if( HopRewriteUtils.isEqualSize(output, X) )
+		if( X!=null && HopRewriteUtils.isEqualSize(output, X) )
 			return RowType.NO_AGG;
-		else if( B1 != null && output.getDim1()==X.getDim1() && output.getDim2()==B1.getDim2() )
+		else if( (B1 != null && output.getDim1()==X.getDim1() && output.getDim2()==B1.getDim2())
+			|| (output instanceof IndexingOp && HopRewriteUtils.isColumnRangeIndexing((IndexingOp)output)))
 			return RowType.NO_AGG_B1;
 		else if( output.getDim1()==X.getDim1() && (output.getDim2()==1 
 				|| HopRewriteUtils.isBinary(output, OpOp2.CBIND)) 
@@ -325,6 +327,8 @@ public class TemplateUtils
 		int max = 0;
 		for( CNode input : node.getInput() )
 			max = Math.max(max, getMaxVectorIntermediates(input));
+		max = Math.max(max, (node instanceof CNodeTernary
+			&& ((CNodeTernary)node).getType().isVectorPrimitive()) ? 1 : 0);
 		max = Math.max(max, (node instanceof CNodeBinary)? 
 			(((CNodeBinary)node).getType().isVectorVectorPrimitive() ? 3 :
 			((CNodeBinary)node).getType().isVectorScalarPrimitive() ? 2 :
@@ -345,10 +349,13 @@ public class TemplateUtils
 			ret += countVectorIntermediates(c);
 		//compute vector requirements of current node
 		int cntBin = (node instanceof CNodeBinary 
-			&& ((CNodeBinary)node).getType().isVectorPrimitive()) ? 1 : 0;
+			&& ((CNodeBinary)node).getType().isVectorPrimitive()
+			&& !((CNodeBinary)node).getType().name().endsWith("_ADD")) ? 1 : 0;
 		int cntUn = (node instanceof CNodeUnary
 				&& ((CNodeUnary)node).getType().isVectorScalarPrimitive()) ? 1 : 0;
-		return ret + cntBin + cntUn;
+		int cntTn = (node instanceof CNodeTernary
+				&& ((CNodeTernary)node).getType().isVectorPrimitive()) ? 1 : 0;
+		return ret + cntBin + cntUn + cntTn;
 	}
 
 	public static boolean isType(TemplateType type, TemplateType... validTypes) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 14bcc1e..bc6d22c 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -904,26 +904,26 @@ public class HopRewriteUtils
 		return true;
 	}
 	
-	public static boolean isFullColumnIndexing(LeftIndexingOp hop)
-	{
-		boolean colPred = hop.getColLowerEqualsUpper();  //single col
-		
-		Hop rl = hop.getInput().get(2);
-		Hop ru = hop.getInput().get(3);
-		
-		return colPred && rl instanceof LiteralOp && getDoubleValueSafe((LiteralOp)rl)==1
-				&& ru instanceof LiteralOp && getDoubleValueSafe((LiteralOp)ru)==hop.getDim1();
-	}
-	
-	public static boolean isFullRowIndexing(LeftIndexingOp hop)
-	{
-		boolean rowPred = hop.getRowLowerEqualsUpper();  //single row
-		
-		Hop cl = hop.getInput().get(4);
-		Hop cu = hop.getInput().get(5);
-		
-		return rowPred && cl instanceof LiteralOp && getDoubleValueSafe((LiteralOp)cl)==1
-				&& cu instanceof LiteralOp && getDoubleValueSafe((LiteralOp)cu)==hop.getDim2();
+	public static boolean isFullColumnIndexing(LeftIndexingOp hop) {
+		return hop.isColLowerEqualsUpper()
+			&& ((isLiteralOfValue(hop.getInput().get(2), 1)
+			&& isLiteralOfValue(hop.getInput().get(3), hop.getDim1()))
+			|| hop.getDim1() == hop.getInput().get(0).getDim1());
+	}
+	
+	public static boolean isFullRowIndexing(LeftIndexingOp hop) {
+		return hop.isRowLowerEqualsUpper()
+			&& ((isLiteralOfValue(hop.getInput().get(4), 1)
+			&& isLiteralOfValue(hop.getInput().get(5), hop.getDim2()))
+			|| hop.getDim2() == hop.getInput().get(0).getDim2());
+	}
+	
+	public static boolean isColumnRangeIndexing(IndexingOp hop) {
+		return ((isLiteralOfValue(hop.getInput().get(1), 1)
+			&& isLiteralOfValue(hop.getInput().get(2), hop.getDim1()))
+			|| hop.getDim1() == hop.getInput().get(0).getDim1())
+			&& isLiteralOfValue(hop.getInput().get(3), 1)
+			&& hop.getInput().get(4) instanceof LiteralOp;
 	}
 	
 	public static boolean isScalarMatrixBinaryMult( Hop hop ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
index e3e55fe..7154b36 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
@@ -236,7 +236,7 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 					IndexingOp rix1 = (IndexingOp) lixrhs.getInput().get(1);
 					
 					//check for rowwise
-					if(    lix.getRowLowerEqualsUpper() && rix0.isRowLowerEqualsUpper() && rix1.isRowLowerEqualsUpper() 
+					if(    lix.isRowLowerEqualsUpper() && rix0.isRowLowerEqualsUpper() && rix1.isRowLowerEqualsUpper() 
 						&& lix.getInput().get(2).getName().equals(itervar)
 						&& rix0.getInput().get(1).getName().equals(itervar)
 						&& rix1.getInput().get(1).getName().equals(itervar))
@@ -245,7 +245,7 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 						rowIx = true;
 					}
 					//check for colwise
-					if(    lix.getColLowerEqualsUpper() && rix0.isColLowerEqualsUpper() && rix1.isColLowerEqualsUpper() 
+					if(    lix.isColLowerEqualsUpper() && rix0.isColLowerEqualsUpper() && rix1.isColLowerEqualsUpper() 
 						&& lix.getInput().get(4).getName().equals(itervar)
 						&& rix0.getInput().get(3).getName().equals(itervar)
 						&& rix1.getInput().get(3).getName().equals(itervar))
@@ -406,14 +406,14 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 		boolean[] ret = new boolean[2]; //apply, rowIx
 		
 		//check for rowwise
-		if(    lix.getRowLowerEqualsUpper() && rix.isRowLowerEqualsUpper() 
+		if(    lix.isRowLowerEqualsUpper() && rix.isRowLowerEqualsUpper() 
 			&& lix.getInput().get(2).getName().equals(itervar)
 			&& rix.getInput().get(1).getName().equals(itervar) ) {
 			ret[0] = true;
 			ret[1] = true;
 		}
 		//check for colwise
-		if(    lix.getColLowerEqualsUpper() && rix.isColLowerEqualsUpper() 
+		if(    lix.isColLowerEqualsUpper() && rix.isColLowerEqualsUpper() 
 			&& lix.getInput().get(4).getName().equals(itervar)
 			&& rix.getInput().get(3).getName().equals(itervar) ) {
 			ret[0] = true;

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
index b797e00..0724612 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
@@ -197,8 +197,8 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 		if( hop instanceof LeftIndexingOp ) //left indexing
 		{
 			LeftIndexingOp ihop0 = (LeftIndexingOp) hop;
-			boolean isSingleRow = ihop0.getRowLowerEqualsUpper();
-			boolean isSingleCol = ihop0.getColLowerEqualsUpper();
+			boolean isSingleRow = ihop0.isRowLowerEqualsUpper();
+			boolean isSingleCol = ihop0.isColLowerEqualsUpper();
 			boolean appliedRow = false;
 			
 			if( isSingleRow && isSingleCol )
@@ -210,7 +210,7 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 				while( current.getInput().get(0) instanceof LeftIndexingOp ) {
 					LeftIndexingOp tmp = (LeftIndexingOp) current.getInput().get(0);
 					if(    tmp.getParent().size()>1  //multiple consumers, i.e., not a simple chain
-						|| !((LeftIndexingOp) tmp).getRowLowerEqualsUpper() //row merge not applicable
+						|| !((LeftIndexingOp) tmp).isRowLowerEqualsUpper() //row merge not applicable
 						|| tmp.getInput().get(2) != ihop0.getInput().get(2) //not the same row
 						|| tmp.getInput().get(0).getDim2() <= 1 ) //target is single column or unknown 
 					{
@@ -289,7 +289,7 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 				while( current.getInput().get(0) instanceof LeftIndexingOp ) {
 					LeftIndexingOp tmp = (LeftIndexingOp) current.getInput().get(0);
 					if(    tmp.getParent().size()>1  //multiple consumers, i.e., not a simple chain
-						|| !((LeftIndexingOp) tmp).getColLowerEqualsUpper() //row merge not applicable
+						|| !((LeftIndexingOp) tmp).isColLowerEqualsUpper() //row merge not applicable
 						|| tmp.getInput().get(4) != ihop0.getInput().get(4)  //not the same col
 						|| tmp.getInput().get(0).getDim1() <= 1 )  //target is single row or unknown
 					{

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index 1c59ee0..5b3b193 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -1451,11 +1451,11 @@ public class LibSpoofPrimitives
 		memPool.remove();
 	}
 	
-	private static double[] allocVector(int len, boolean reset) {
+	protected static double[] allocVector(int len, boolean reset) {
 		return allocVector(len, reset, 0);
 	}
 	
-	private static double[] allocVector(int len, boolean reset, double resetVal) {
+	protected static double[] allocVector(int len, boolean reset, double resetVal) {
 		LinkedList<double[]> list = memPool.get(); 
 		
 		//find and remove vector with matching len 

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
index 9561fcb..e302012 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
@@ -168,6 +168,19 @@ public abstract class SpoofOperator implements Serializable
 			(data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, colIndex) : 0;
 	}
 	
+	protected static double[] getVector(SideInput data, int n, double rowIndex, double colIndex) {
+		int irowIndex = UtilFunctions.toInt(rowIndex);
+		int icolIndex = UtilFunctions.toInt(colIndex);
+		return getVector(data, n, irowIndex, icolIndex);
+	}
+	
+	protected static double[] getVector(SideInput data, int n, int rowIndex, int colIndex) {
+		//note: wrapper sideinput guaranteed to be in dense format
+		double[] c = LibSpoofPrimitives.allocVector(colIndex+1, false);
+		System.arraycopy(data.ddat, rowIndex*n, c, 0, colIndex+1);
+		return c;
+	}
+	
 	public static class SideInput {
 		public final double[] ddat;
 		public final MatrixBlock mdat;

http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c5a5/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index 59c3ab4..2092f22 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -578,7 +578,8 @@ public class RowAggTmplTest extends AutomatedTestBase
 				Assert.assertTrue(!heavyHittersContainsSubString("spoofRA", 2)
 					&& !heavyHittersContainsSubString("sp_spoofRA", 2));
 			if( testname.equals(TEST_NAME30) )
-				Assert.assertTrue(!heavyHittersContainsSubString("spoofRA", 2));
+				Assert.assertTrue(!heavyHittersContainsSubString("spoofRA", 2)
+					&& !heavyHittersContainsSubString("rangeReIndex"));
 		}
 		finally {
 			rtplatform = platformOld;