You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/24 20:27:35 UTC

[3/6] incubator-systemml git commit: [SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

[SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

This patch removes redundancy from existing hop rewrites by
consolidating common primitives into HopRewriteUtils in order to avoid
subtle bugs such as missing size propagation, missing line numbers, and
missing blocking configurations.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1fe1a02d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1fe1a02d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1fe1a02d

Branch: refs/heads/master
Commit: 1fe1a02d210356207d75cc3ffc2f246cd4a8d11b
Parents: 4316efe
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Feb 22 18:23:12 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 24 12:27:26 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/AggBinaryOp.java |  68 +--
 .../sysml/hops/ParameterizedBuiltinOp.java      |  45 +-
 .../java/org/apache/sysml/hops/ReorgOp.java     |   4 +-
 .../sysml/hops/globalopt/gdfgraph/GDFNode.java  |   5 +-
 .../sysml/hops/rewrite/HopRewriteUtils.java     | 179 +++---
 .../RewriteAlgebraicSimplificationDynamic.java  | 596 ++++++-------------
 .../RewriteAlgebraicSimplificationStatic.java   | 416 ++++---------
 .../hops/rewrite/RewriteConstantFolding.java    |  13 +-
 .../rewrite/RewriteForLoopVectorization.java    |  45 +-
 .../rewrite/RewriteIndexingVectorization.java   |  18 +-
 .../RewriteMatrixMultChainOptimization.java     |   4 +-
 .../rewrite/RewriteRemoveReadAfterWrite.java    |   7 +-
 .../rewrite/RewriteRemoveUnnecessaryCasts.java  |   7 +-
 .../RewriteSplitDagDataDependentOperators.java  |  19 +-
 .../rewrite/RewriteSplitDagUnknownCSVRead.java  |   4 +-
 .../org/apache/sysml/parser/DMLTranslator.java  |   4 +-
 .../parfor/opt/OptimizerRuleBased.java          |   5 +-
 17 files changed, 489 insertions(+), 950 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 73dd8a4..dd9182d 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -191,7 +191,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			else if( et == ExecType.SPARK ) 
 			{
 				//matrix mult operation selection part 3 (SPARK type)
-				boolean tmmRewrite = input1 instanceof ReorgOp && ((ReorgOp)input1).getOp()==ReOrgOp.TRANSPOSE;
+				boolean tmmRewrite = HopRewriteUtils.isTransposeOperation(input1);
 				_method = optFindMMultMethodSpark ( 
 						input1.getDim1(), input1.getDim2(), input1.getRowsInBlock(), input1.getColsInBlock(), input1.getNnz(),   
 						input2.getDim1(), input2.getDim2(), input2.getRowsInBlock(), input2.getColsInBlock(), input2.getNnz(),
@@ -459,7 +459,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 	{
 		int index = left ? 0 : 1;
 		return !(getInput().get(index) instanceof DataOp && ((DataOp)getInput().get(index)).requiresCheckpoint())  
-			&& !(getInput().get(index) instanceof ReorgOp && ((ReorgOp)getInput().get(index)).getOp()==ReOrgOp.TRANSPOSE)
+			&& !HopRewriteUtils.isTransposeOperation(getInput().get(index))
 			&& getInput().get(index).getParent().size()==1 //bagg is only parent	
 			&& !getInput().get(index).areDimsBelowThreshold() 
 			&& getInput().get(index).optFindExecType() == ExecType.SPARK
@@ -479,15 +479,13 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop in1 = getInput().get(0);
 		Hop in2 = getInput().get(1);
 		
-		if(    in1 instanceof ReorgOp 
-			&& ((ReorgOp)in1).getOp() == ReOrgOp.TRANSPOSE 
+		if( HopRewriteUtils.isTransposeOperation(in1)
 			&& in1.getInput().get(0) == in2 )
 		{
 			ret = MMTSJType.LEFT;
 		}
 		
-		if(    in2 instanceof ReorgOp 
-			&& ((ReorgOp)in2).getOp() == ReOrgOp.TRANSPOSE 
+		if( HopRewriteUtils.isTransposeOperation(in2) 
 			&& in2.getInput().get(0) == in1 )
 		{
 			ret = MMTSJType.RIGHT;
@@ -510,7 +508,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop in2 = getInput().get(1);
 		
 		//check for transpose left input (both chain types)
-		if( in1 instanceof ReorgOp && ((ReorgOp)in1).getOp() == ReOrgOp.TRANSPOSE )
+		if( HopRewriteUtils.isTransposeOperation(in1) )
 		{
 			Hop X = in1.getInput().get(0);
 				
@@ -615,7 +613,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop rightInput = getInput().get(1);
 		
 		Hop nrow = HopRewriteUtils.createValueHop(pmInput, true); //NROW
-		HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+		nrow.setOutputBlocksizes(0, 0);
 		nrow.setForcedExecType(ExecType.CP);
 		HopRewriteUtils.copyLineNumbers(this, nrow);
 		Lop lnrow = nrow.constructLops();
@@ -644,7 +642,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			Hop h2 = getInput().get(1);
 			Lop left; Lop right;
 			boolean isLeftTransposed; boolean isRightTransposed;
-			if( h1 instanceof ReorgOp && ((ReorgOp)h1).getOp()==ReOrgOp.TRANSPOSE ) {
+			if( HopRewriteUtils.isTransposeOperation(h1) ) {
 				isLeftTransposed = true;
 				left = h1.getInput().get(0).constructLops();
 			}
@@ -652,7 +650,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 				isLeftTransposed = false;
 				left = h1.constructLops();
 			}
-			if( h2 instanceof ReorgOp && ((ReorgOp)h2).getOp()==ReOrgOp.TRANSPOSE ) {
+			if( HopRewriteUtils.isTransposeOperation(h2) ) {
 				isRightTransposed = true;
 				right = h2.getInput().get(0).constructLops();
 			}
@@ -872,8 +870,6 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		
 		Hop pmInput = getInput().get(0);
 		Hop rightInput = getInput().get(1);
-		long brlen = pmInput.getRowsInBlock();
-		long bclen = pmInput.getColsInBlock();
 		
 		Lop lpmInput = pmInput.constructLops();
 		Hop nrow = null;
@@ -887,30 +883,19 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			//v = rowMaxIndex(t(pm)) * rowMax(t(pm)) 
 			ReorgOp transpose = HopRewriteUtils.createTranspose(pmInput);
 			transpose.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, transpose);	
 			
-			AggUnaryOp agg1 = new AggUnaryOp("tmp2a", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAXINDEX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg1, brlen, bclen);
-			agg1.refreshSizeInformation();
+			AggUnaryOp agg1 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAXINDEX, Direction.Row);
 			agg1.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, agg1);
 			
-			AggUnaryOp agg2 = new AggUnaryOp("tmp2b", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg2, brlen, bclen);
-			agg2.refreshSizeInformation();
+			AggUnaryOp agg2 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAX, Direction.Row);
 			agg2.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, agg2);
 			
-			BinaryOp mult = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, agg1, agg2);
-			HopRewriteUtils.setOutputBlocksizes(mult, brlen, bclen); 
-			mult.refreshSizeInformation();
+			BinaryOp mult = HopRewriteUtils.createBinary(agg1, agg2, OpOp2.MULT);
 			mult.setForcedExecType(ExecType.SPARK);
-			//mult.computeMemEstimate(memo); //select exec type
-			HopRewriteUtils.copyLineNumbers(this, mult);
 			
 			//compute NROW target via nrow(m)
 			nrow = HopRewriteUtils.createValueHop(pmInput, true);
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(ExecType.CP);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 			
@@ -921,7 +906,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		{
 			//compute NROW target via max(v)
 			nrow = HopRewriteUtils.createAggUnaryOp(pmInput, AggOp.MAX, Direction.RowCol); 
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(etVect);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 		}
@@ -1239,8 +1224,6 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		
 		Hop pmInput = getInput().get(0);
 		Hop rightInput = getInput().get(1);
-		long brlen = pmInput.getRowsInBlock();
-		long bclen = pmInput.getColsInBlock();
 		
 		Lop lpmInput = pmInput.constructLops();
 		Hop nrow = null;
@@ -1254,29 +1237,19 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			//v = rowMaxIndex(t(pm)) * rowMax(t(pm)) 
 			ReorgOp transpose = HopRewriteUtils.createTranspose(pmInput);
 			transpose.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, transpose);	
 			
-			AggUnaryOp agg1 = new AggUnaryOp("tmp2a", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAXINDEX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg1, brlen, bclen);
-			agg1.refreshSizeInformation();
+			AggUnaryOp agg1 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAXINDEX, Direction.Row);
 			agg1.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, agg1);
 			
-			AggUnaryOp agg2 = new AggUnaryOp("tmp2b", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg2, brlen, bclen);
-			agg2.refreshSizeInformation();
+			AggUnaryOp agg2 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAX, Direction.Row);
 			agg2.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, agg2);
 			
-			BinaryOp mult = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, agg1, agg2);
-			HopRewriteUtils.setOutputBlocksizes(mult, brlen, bclen); 
-			mult.refreshSizeInformation();
+			BinaryOp mult = HopRewriteUtils.createBinary(agg1, agg2, OpOp2.MULT);
 			mult.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, mult);
 			
 			//compute NROW target via nrow(m)
 			nrow = HopRewriteUtils.createValueHop(pmInput, true);
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(ExecType.CP);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 				
@@ -1287,7 +1260,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		{
 			//compute NROW target via max(v)
 			nrow = HopRewriteUtils.createAggUnaryOp(pmInput, AggOp.MAX, Direction.RowCol); 
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(etVect);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 		}
@@ -1345,7 +1318,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		//even a ba in CP does not imply that both transposes can be executed in CP)
 		if( CP ) //in-memory ba 
 		{
-			if( h1 instanceof ReorgOp && ((ReorgOp)h1).getOp()==ReOrgOp.TRANSPOSE )
+			if( HopRewriteUtils.isTransposeOperation(h1) )
 			{
 				long m = h1.getDim1();
 				long cd = h1.getDim2();
@@ -1861,8 +1834,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop input1 = getInput().get(0);
 		Hop input2 = getInput().get(1);
 		
-		if( isMatrixMultiply() )
-		{
+		if( isMatrixMultiply() ) {
 			setDim1(input1.getDim1());
 			setDim2(input2.getDim2());
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index e2ec190..72e9115 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -512,11 +512,11 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				//step1: compute index vectors
 				Hop ppred0 = input;
 				if( !isPPredInput ) { //ppred only if required
-					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
+					ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 					HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
 				}
 				
-				UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0); 
+				UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM); 
 				HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
 			
 				Lop loutput = null;
@@ -524,14 +524,14 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
 				if( _outputPermutationMatrix && mest < mbudget ) //SPECIAL CASE: SELECTION VECTOR
 				{
-					BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
+					BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
 					HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
 					loutput = sel.constructLops();
 				}
 				else //GENERAL CASE: GENERAL PERMUTATION MATRIX
 				{
 					//max ensures non-zero entries and at least one output row
-					BinaryOp max = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MAX, cumsum, new LiteralOp(1));
+					BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
 					HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
 					
 					DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
@@ -541,7 +541,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					//step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
 					//note: weights always the input (even if isPPredInput) because input also includes 0s
 					TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
-					HopRewriteUtils.setOutputBlocksizes(table, brlen, bclen);
+					table.setOutputBlocksizes(brlen, bclen);
 					table.refreshSizeInformation();
 					table.setForcedExecType(ExecType.MR); //force MR 
 					HopRewriteUtils.copyLineNumbers(this, table);
@@ -581,23 +581,18 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				
 				if(selectHop == null) {
 					//Step1: compute row/col non-empty indicators 
-					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
-					HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
-					ppred0.refreshSizeInformation();
+					ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 					ppred0.setForcedExecType(ExecType.MR); //always MR 
-					HopRewriteUtils.copyLineNumbers(this, ppred0);
 					
 					emptyInd = ppred0;
 					if( !((rmRows && clen == 1) || (!rmRows && rlen==1)) ){
-						emptyInd = new AggUnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, rmRows?Direction.Row:Direction.Col, ppred0);
-						HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
-						emptyInd.refreshSizeInformation();
+						emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows?Direction.Row:Direction.Col);
 						emptyInd.setForcedExecType(ExecType.MR); //always MR
 						HopRewriteUtils.copyLineNumbers(this, emptyInd);
 					}
 				} else {
 					emptyInd = selectHop;
-					HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
+					emptyInd.setOutputBlocksizes(brlen, bclen);
 					emptyInd.refreshSizeInformation();
 					emptyInd.setForcedExecType(ExecType.MR); //always MR
 					HopRewriteUtils.copyLineNumbers(this, emptyInd);
@@ -610,7 +605,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);	
 				}
 			
-				UnaryOp cumsum = new UnaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, cumsumInput); 
+				UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM); 
 				HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
 			
 				Hop cumsumOutput = cumsum;
@@ -619,10 +614,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);	
 				}
 				
-				Hop maxDim = new AggUnaryOp("tmp4", DataType.SCALAR, ValueType.DOUBLE, AggOp.MAX, Direction.RowCol, cumsumOutput); //alternative: right indexing
+				Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol); //alternative: right indexing
 				HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
 				
-				BinaryOp offsets = new BinaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cumsumOutput, emptyInd);
+				BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
 				HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
 				
 				//Step 3: gather non-empty rows/cols into final results 
@@ -713,23 +708,17 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			
 			if(selectHop == null) {
 				//Step1: compute row/col non-empty indicators 
-				ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
-				HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
-				ppred0.refreshSizeInformation();
+				ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 				ppred0.setForcedExecType(ExecType.SPARK); //always Spark
-				HopRewriteUtils.copyLineNumbers(this, ppred0);
 				
 				emptyInd = ppred0;
 				if( !((rmRows && clen == 1) || (!rmRows && rlen==1)) ){
-					emptyInd = new AggUnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, rmRows?Direction.Row:Direction.Col, ppred0);
-					HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
-					emptyInd.refreshSizeInformation();
+					emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows?Direction.Row:Direction.Col);
 					emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
-					HopRewriteUtils.copyLineNumbers(this, emptyInd);
 				}
 			} else {
 				emptyInd = selectHop;
-				HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
+				emptyInd.setOutputBlocksizes(brlen, bclen);
 				emptyInd.refreshSizeInformation();
 				emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
 				HopRewriteUtils.copyLineNumbers(this, emptyInd);
@@ -742,7 +731,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
 			}
 		
-			UnaryOp cumsum = new UnaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, cumsumInput); 
+			UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM); 
 			HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
 		
 			Hop cumsumOutput = cumsum;
@@ -751,10 +740,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);	
 			}
 			
-			Hop maxDim = new AggUnaryOp("tmp4", DataType.SCALAR, ValueType.DOUBLE, AggOp.MAX, Direction.RowCol, cumsumOutput); //alternative: right indexing
+			Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol); //alternative: right indexing
 			HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
 			
-			BinaryOp offsets = new BinaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cumsumOutput, emptyInd);
+			BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
 			HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
 			
 			//Step 3: gather non-empty rows/cols into final results 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index abe03a8..8d0b4b4 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -256,7 +256,7 @@ public class ReorgOp extends Hop implements MultiThreadedHop
 						vinput = new IndexingOp("tmp1", getDataType(), getValueType(), input, new LiteralOp(1L), 
 								HopRewriteUtils.createValueHop(input, true), by, by, false, true);
 						vinput.refreshSizeInformation();
-						HopRewriteUtils.setOutputBlocksizes(vinput, getRowsInBlock(), getColsInBlock());
+						vinput.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
 						HopRewriteUtils.copyLineNumbers(this, vinput);	
 					}
 					
@@ -314,7 +314,7 @@ public class ReorgOp extends Hop implements MultiThreadedHop
 						
 						//generate table
 						TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, seq, voutput, new LiteralOp(1L) );
-						HopRewriteUtils.setOutputBlocksizes(table, getRowsInBlock(), getColsInBlock());
+						table.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
 						table.refreshSizeInformation();
 						table.setForcedExecType(ExecType.MR); //force MR 
 						HopRewriteUtils.copyLineNumbers(this, table);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
index e87a911..e385a86 100644
--- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
+++ b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
@@ -28,9 +28,8 @@ import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.FileFormatTypes;
 import org.apache.sysml.hops.Hop.OpOp1;
-import org.apache.sysml.hops.Hop.ReOrgOp;
-import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.UnaryOp;
+import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.ProgramBlock;
@@ -149,7 +148,7 @@ public class GDFNode
 	{
 		return (   _hop instanceof UnaryOp && format!=FileFormatTypes.CSV
 				|| (_hop instanceof AggUnaryOp && ((AggUnaryOp)_hop).getDirection()==Direction.RowCol && format!=FileFormatTypes.CSV)
-				|| (_hop instanceof ReorgOp && ((ReorgOp)_hop).getOp()==ReOrgOp.TRANSPOSE && format!=FileFormatTypes.CSV)
+				|| (HopRewriteUtils.isTransposeOperation(_hop) && format!=FileFormatTypes.CSV)
 				|| format==FileFormatTypes.BINARY ); //any op
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index d3be09d..7f65ddd 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
 
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
@@ -222,16 +223,12 @@ public class HopRewriteUtils
 		return childs.indexOf(child);
 	}
 	
-	public static void removeChildReference( Hop parent, Hop child )
-	{
-		//remove child reference
+	public static void removeChildReference( Hop parent, Hop child ) {
 		parent.getInput().remove( child );
 		child.getParent().remove( parent );
 	}
 	
-	public static void removeChildReferenceByPos( Hop parent, Hop child, int posChild )
-	{
-		//remove child reference
+	public static void removeChildReferenceByPos( Hop parent, Hop child, int posChild ) {
 		parent.getInput().remove( posChild );
 		child.getParent().remove( parent );
 	}
@@ -246,18 +243,35 @@ public class HopRewriteUtils
 		parent.getInput().clear();
 	}
 	
-	public static void addChildReference( Hop parent, Hop child )
-	{
+	public static void addChildReference( Hop parent, Hop child ) {
 		parent.getInput().add( child );
 		child.getParent().add( parent );
 	}
 	
-	public static void addChildReference( Hop parent, Hop child, int pos )
-	{
+	public static void addChildReference( Hop parent, Hop child, int pos ){
 		parent.getInput().add( pos, child );
 		child.getParent().add( parent );
 	}
 	
+	public static void replaceChildReference( Hop parent, Hop inOld, Hop inNew ) {
+		int pos = getChildReferencePos(parent, inOld);
+		removeChildReferenceByPos(parent, inOld, pos);
+		addChildReference(parent, inNew, pos);
+		parent.refreshSizeInformation();
+	}
+	
+	public static void replaceChildReference( Hop parent, Hop inOld, Hop inNew, int pos ) {
+		removeChildReferenceByPos(parent, inOld, pos);
+		addChildReference(parent, inNew, pos);
+		parent.refreshSizeInformation();
+	}
+	
+	public static void cleanupUnreferenced( Hop... inputs ) {
+		for( Hop input : inputs )
+			if( input.getParent().isEmpty() )
+				removeAllChildReferences(input);
+	}
+	
 	public static Hop createDataGenOp( Hop input, double value ) 
 		throws HopsException
 	{		
@@ -279,8 +293,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(input.getRowsInBlock());
-		datagen.setColsInBlock(input.getColsInBlock());
+		datagen.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -335,12 +349,11 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		DataGenOp datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params2);
-		datagen.setRowsInBlock(inputGen.getRowsInBlock());
-		datagen.setColsInBlock(inputGen.getColsInBlock());
+		datagen.setOutputBlocksizes(inputGen.getRowsInBlock(), inputGen.getColsInBlock());
+		copyLineNumbers(inputGen, datagen);
 		
-		if( smin==0 && smax==0 ) {
+		if( smin==0 && smax==0 )
 			datagen.setNnz(0);
-		}
 			
 		return datagen;
 	}
@@ -366,8 +379,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -399,8 +412,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -425,8 +438,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -441,8 +454,8 @@ public class HopRewriteUtils
 	public static ReorgOp createReorg(Hop input, ReOrgOp rop)
 	{
 		ReorgOp transpose = new ReorgOp(input.getName(), input.getDataType(), input.getValueType(), rop, input);
-		HopRewriteUtils.setOutputBlocksizes(transpose, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, transpose);
+		transpose.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, transpose);
 		transpose.refreshSizeInformation();	
 		
 		return transpose;
@@ -451,31 +464,35 @@ public class HopRewriteUtils
 	public static UnaryOp createUnary(Hop input, OpOp1 type) 
 		throws HopsException
 	{
-		DataType dt = (type==OpOp1.CAST_AS_SCALAR) ? DataType.SCALAR : input.getDataType();
-		UnaryOp unary = new UnaryOp(input.getName(), dt, input.getValueType(), type, input);
-		HopRewriteUtils.setOutputBlocksizes(unary, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, unary);
+		DataType dt = (type==OpOp1.CAST_AS_SCALAR) ? DataType.SCALAR : 
+			(type==OpOp1.CAST_AS_MATRIX) ? DataType.MATRIX : input.getDataType();
+		ValueType vt = (type==OpOp1.CAST_AS_MATRIX) ? ValueType.DOUBLE : input.getValueType();
+		UnaryOp unary = new UnaryOp(input.getName(), dt, vt, type, input);
+		unary.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		if( type == OpOp1.CAST_AS_SCALAR || type == OpOp1.CAST_AS_MATRIX ) {
+			int dim = (type==OpOp1.CAST_AS_SCALAR) ? 0 : 1;
+			int blksz = (type==OpOp1.CAST_AS_SCALAR) ? 0 : ConfigurationManager.getBlocksize();
+			setOutputParameters(unary, dim, dim, blksz, blksz, -1);		
+		}
+		
+		copyLineNumbers(input, unary);
 		unary.refreshSizeInformation();	
 		
 		return unary;
 	}
 	
-	public static BinaryOp createMinus(Hop input)
-	{
-		BinaryOp minus = new BinaryOp(input.getName(), input.getDataType(), input.getValueType(), OpOp2.MINUS, new LiteralOp(0), input);
-		HopRewriteUtils.setOutputBlocksizes(minus, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, minus);
-		minus.refreshSizeInformation();	
-		
-		return minus;
+	public static BinaryOp createBinaryMinus(Hop input) {
+		return createBinary(new LiteralOp(0), input, OpOp2.MINUS);
 	}
 	
 	public static BinaryOp createBinary(Hop input1, Hop input2, OpOp2 op)
 	{
-		BinaryOp bop = new BinaryOp(input1.getName(), input1.getDataType(), 
-				input1.getValueType(), op, input1, input2);
-		HopRewriteUtils.setOutputBlocksizes(bop, input1.getRowsInBlock(), input1.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input1, bop);
+		Hop mainInput = input1.getDataType().isMatrix() ? input1 : 
+			input2.getDataType().isMatrix() ? input2 : input1;
+		BinaryOp bop = new BinaryOp(mainInput.getName(), mainInput.getDataType(), 
+			mainInput.getValueType(), op, input1, input2);
+		bop.setOutputBlocksizes(mainInput.getRowsInBlock(), mainInput.getColsInBlock());
+		copyLineNumbers(mainInput, bop);
 		bop.refreshSizeInformation();	
 		
 		return bop;
@@ -485,23 +502,20 @@ public class HopRewriteUtils
 		return createAggUnaryOp(input, AggOp.SUM, Direction.RowCol);
 	}
 	
-	public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir )
-	{
+	public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir ) {
 		DataType dt = (dir==Direction.RowCol) ? DataType.SCALAR : input.getDataType();
-		
 		AggUnaryOp auop = new AggUnaryOp(input.getName(), dt, input.getValueType(), op, dir, input);
-		auop.setRowsInBlock(input.getRowsInBlock());
-		auop.setColsInBlock(input.getColsInBlock());
+		auop.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, auop);
 		auop.refreshSizeInformation();
 		
 		return auop;
 	}
 	
-	public static AggBinaryOp createMatrixMultiply(Hop left, Hop right)
-	{
+	public static AggBinaryOp createMatrixMultiply(Hop left, Hop right) {
 		AggBinaryOp mmult = new AggBinaryOp(left.getName(), left.getDataType(), left.getValueType(), OpOp2.MULT, AggOp.SUM, left, right);
-		mmult.setRowsInBlock(left.getRowsInBlock());
-		mmult.setColsInBlock(right.getColsInBlock());
+		mmult.setOutputBlocksizes(left.getRowsInBlock(), right.getColsInBlock());
+		copyLineNumbers(left, mmult);
 		mmult.refreshSizeInformation();
 		
 		return mmult;
@@ -550,55 +564,42 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		DataGenOp datagen = new DataGenOp(DataGenMethod.SEQ, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(input.getRowsInBlock());
-		datagen.setColsInBlock(input.getColsInBlock());
+		datagen.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, datagen);
 		
 		return datagen;
 	}
 	
 	public static TernaryOp createTernaryOp(Hop mleft, Hop smid, Hop mright, OpOp3 op) {
 		TernaryOp ternOp = new TernaryOp("tmp", DataType.MATRIX, ValueType.DOUBLE, op, mleft, smid, mright);
-		ternOp.setRowsInBlock(mleft.getRowsInBlock());
-		ternOp.setColsInBlock(mleft.getColsInBlock());
+		ternOp.setOutputBlocksizes(mleft.getRowsInBlock(), mleft.getColsInBlock());
+		copyLineNumbers(mleft, ternOp);
 		ternOp.refreshSizeInformation();
 		return ternOp;
 	}
 	
-	public static void setOutputBlocksizes( Hop hop, long brlen, long bclen )
-	{
-		hop.setRowsInBlock( brlen );
-		hop.setColsInBlock( bclen );
-	}
-	
-	public static void setOutputParameters( Hop hop, long rlen, long clen, long brlen, long bclen, long nnz )
-	{
+	public static void setOutputParameters( Hop hop, long rlen, long clen, long brlen, long bclen, long nnz ) {
 		hop.setDim1( rlen );
 		hop.setDim2( clen );
-		hop.setRowsInBlock( brlen );
-		hop.setColsInBlock( bclen );
+		hop.setOutputBlocksizes(brlen, bclen );
 		hop.setNnz( nnz );
 	}
 	
-	public static void setOutputParametersForScalar( Hop hop )
-	{
+	public static void setOutputParametersForScalar( Hop hop ) {
 		hop.setDim1( 0 );
 		hop.setDim2( 0 );
-		hop.setRowsInBlock( -1 );
-		hop.setColsInBlock( -1 );
+		hop.setOutputBlocksizes(-1, -1 );
 		hop.setNnz( -1 );
 	}
 	
-	public static void refreshOutputParameters( Hop hnew, Hop hold )
-	{
+	public static void refreshOutputParameters( Hop hnew, Hop hold ) {
 		hnew.setDim1( hold.getDim1() );
 		hnew.setDim2( hold.getDim2() );
-		hnew.setRowsInBlock(hold.getRowsInBlock());
-		hnew.setColsInBlock(hold.getColsInBlock());
+		hnew.setOutputBlocksizes(hold.getRowsInBlock(), hold.getColsInBlock());
 		hnew.refreshSizeInformation();
 	}
 	
-	public static void copyLineNumbers( Hop src, Hop dest )
-	{
+	public static void copyLineNumbers( Hop src, Hop dest ) {
 		dest.setAllPositions(src.getBeginLine(), src.getBeginColumn(), src.getEndLine(), src.getEndColumn());
 	}
 	
@@ -610,7 +611,7 @@ public class HopRewriteUtils
 	public static void updateHopCharacteristics( Hop hop, long brlen, long bclen, MemoTable memo, Hop src )
 	{
 		//update block sizes and dimensions  
-		setOutputBlocksizes(hop, brlen, bclen);
+		hop.setOutputBlocksizes(brlen, bclen);
 		hop.refreshSizeInformation();
 		
 		//compute memory estimates (for exec type selection)
@@ -668,7 +669,7 @@ public class HopRewriteUtils
 	}
 	
 	public static boolean isOuterProductLikeMM( Hop hop ) {
-		return hop instanceof AggBinaryOp
+		return isMatrixMultiply(hop)
 			&& hop.getInput().get(0).getDim1() > hop.getInput().get(0).getDim2()
 			&& hop.getInput().get(1).getDim1() < hop.getInput().get(1).getDim2();
 	}
@@ -715,6 +716,10 @@ public class HopRewriteUtils
 		return (hop instanceof ReorgOp && ((ReorgOp)hop).getOp()==ReOrgOp.TRANSPOSE);
 	}
 	
+	public static boolean isTransposeOperation(Hop hop, int maxParents) {
+		return isTransposeOperation(hop) && hop.getParent().size() <= maxParents;
+	}
+	
 	public static boolean containsTransposeOperation(ArrayList<Hop> hops) {
 		boolean ret = false;
 		for( Hop hop : hops )
@@ -723,8 +728,16 @@ public class HopRewriteUtils
 	}
 	
 	public static boolean isTransposeOfItself(Hop hop1, Hop hop2) {
-		return hop1 instanceof ReorgOp && ((ReorgOp)hop1).getOp()==ReOrgOp.TRANSPOSE && hop1.getInput().get(0) == hop2
-			|| hop2 instanceof ReorgOp && ((ReorgOp)hop2).getOp()==ReOrgOp.TRANSPOSE && hop2.getInput().get(0) == hop1;	
+		return isTransposeOperation(hop1) && hop1.getInput().get(0) == hop2
+			|| isTransposeOperation(hop2) && hop2.getInput().get(0) == hop1;	
+	}
+	
+	public static boolean isBinary(Hop hop, OpOp2 type) {
+		return hop instanceof BinaryOp && ((BinaryOp)hop).getOp()==type;
+	}
+	
+	public static boolean isBinary(Hop hop, OpOp2 type, int maxParents) {
+		return isBinary(hop, type) && hop.getParent().size() <= maxParents;
 	}
 	
 	public static boolean isBinaryMatrixScalarOperation(Hop hop) {
@@ -733,6 +746,18 @@ public class HopRewriteUtils
 			||(hop.getInput().get(1).getDataType().isMatrix() && hop.getInput().get(0).getDataType().isScalar()));
 	}
 	
+	public static boolean isUnary(Hop hop, OpOp1 type) {
+		return hop instanceof UnaryOp && ((UnaryOp)hop).getOp()==type;
+	}
+	
+	public static boolean isUnary(Hop hop, OpOp1 type, int maxParents) {
+		return isUnary(hop, type) && hop.getParent().size() <= maxParents;
+	}
+	
+	public static boolean isMatrixMultiply(Hop hop) {
+		return hop instanceof AggBinaryOp && ((AggBinaryOp)hop).isMatrixMultiply();
+	}
+	
 	public static boolean isNonZeroIndicator(Hop pred, Hop hop )
 	{
 		if( pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL