You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/24 20:27:33 UTC

[1/6] incubator-systemml git commit: [SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 4316efeba -> 2f7fa8d73


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
index 41459b4..2ae27c8 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
@@ -199,11 +199,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					if( dright.getOp()==DataGenMethod.RAND && dright.hasConstantValue() )
 					{
 						Hop drightIn = dright.getInput().get(dright.getParamIndex(DataExpression.RAND_MIN));
-						HopRewriteUtils.removeChildReference(bop, dright);
-						HopRewriteUtils.addChildReference(bop, drightIn, 1);
-						//cleanup if only consumer of intermediate
-						if( dright.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences( dright );
+						HopRewriteUtils.replaceChildReference(bop, dright, drightIn, 1);
+						HopRewriteUtils.cleanupUnreferenced(dright);
 						
 						LOG.debug("Applied removeUnnecessaryVectorizeOperation1");
 					}
@@ -217,11 +214,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						&& (left.getDim1()==1 || right.getDim1()>1))
 					{
 						Hop dleftIn = dleft.getInput().get(dleft.getParamIndex(DataExpression.RAND_MIN));
-						HopRewriteUtils.removeChildReference(bop, dleft);
-						HopRewriteUtils.addChildReference(bop, dleftIn, 0);
-						//cleanup if only consumer of intermediate
-						if( dleft.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences( dleft );
+						HopRewriteUtils.replaceChildReference(bop, dleft, dleftIn, 0);
+						HopRewriteUtils.cleanupUnreferenced(dleft);
 						
 						LOG.debug("Applied removeUnnecessaryVectorizeOperation2");
 					}
@@ -264,8 +258,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				if( bop.getOp()==OpOp2.DIV || bop.getOp()==OpOp2.MULT )
 				{
-					HopRewriteUtils.removeChildReference(parent, bop);
-					HopRewriteUtils.addChildReference(parent, left, pos);
+					HopRewriteUtils.replaceChildReference(parent, bop, left, pos);
 					hi = left;
 
 					LOG.debug("Applied removeUnnecessaryBinaryOperation1 (line "+bop.getBeginLine()+")");
@@ -277,8 +270,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				if( bop.getOp()==OpOp2.MINUS )
 				{
-					HopRewriteUtils.removeChildReference(parent, bop);
-					HopRewriteUtils.addChildReference(parent, left, pos);
+					HopRewriteUtils.replaceChildReference(parent, bop, left, pos);
 					hi = left;
 
 					LOG.debug("Applied removeUnnecessaryBinaryOperation2 (line "+bop.getBeginLine()+")");
@@ -290,8 +282,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				if( bop.getOp()==OpOp2.MULT )
 				{
-					HopRewriteUtils.removeChildReference(parent, bop);
-					HopRewriteUtils.addChildReference(parent, right, pos);
+					HopRewriteUtils.replaceChildReference(parent, bop, right, pos);
 					hi = right;
 
 					LOG.debug("Applied removeUnnecessaryBinaryOperation3 (line "+bop.getBeginLine()+")");
@@ -306,8 +297,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				if( bop.getOp()==OpOp2.MULT )
 				{
 					bop.setOp(OpOp2.MINUS);
-					HopRewriteUtils.removeChildReferenceByPos(bop, left, 0);
-					HopRewriteUtils.addChildReference(bop, new LiteralOp(0), 0);
+					HopRewriteUtils.replaceChildReference(bop, left, new LiteralOp(0), 0);
 					hi = bop;
 
 					LOG.debug("Applied removeUnnecessaryBinaryOperation4 (line "+bop.getBeginLine()+")");
@@ -380,13 +370,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						
 					//rewire all parents (avoid anomalies with replicated datagen)
 					List<Hop> parents = new ArrayList<Hop>(bop.getParent());
-					for( Hop p : parents ) {
-						int cpos = HopRewriteUtils.getChildReferencePos(p, bop);
-						HopRewriteUtils.removeChildReferenceByPos(p, bop, cpos);
-						HopRewriteUtils.addChildReference(p, gen, cpos);
-						//propagate potentially updated nnz=0
-						p.refreshSizeInformation();
-					}
+					for( Hop p : parents )
+						HopRewriteUtils.replaceChildReference(p, bop, gen);
 					
 					hi = gen;
 					LOG.debug("Applied fuseDatagenAndBinaryOperation1 (line "+bop.getBeginLine()+").");
@@ -417,13 +402,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					
 					//rewire all parents (avoid anomalies with replicated datagen)
 					List<Hop> parents = new ArrayList<Hop>(bop.getParent());
-					for( Hop p : parents ) {
-						int cpos = HopRewriteUtils.getChildReferencePos(p, bop);
-						HopRewriteUtils.removeChildReferenceByPos(p, bop, cpos);
-						HopRewriteUtils.addChildReference(p, gen, cpos);
-						//propagate potentially updated nnz=0
-						p.refreshSizeInformation();
-					}
+					for( Hop p : parents )
+						HopRewriteUtils.replaceChildReference(p, bop, gen);
 					
 					hi = gen;
 					LOG.debug("Applied fuseDatagenAndBinaryOperation2 (line "+bop.getBeginLine()+").");
@@ -472,13 +452,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					
 					//rewire all parents (avoid anomalies with replicated datagen)
 					List<Hop> parents = new ArrayList<Hop>(bop.getParent());
-					for( Hop p : parents ) {
-						int cpos = HopRewriteUtils.getChildReferencePos(p, bop);
-						HopRewriteUtils.removeChildReferenceByPos(p, bop, cpos);
-						HopRewriteUtils.addChildReference(p, inputGen, cpos);
-						//propagate potentially updated nnz=0
-						p.refreshSizeInformation();
-					}
+					for( Hop p : parents )
+						HopRewriteUtils.replaceChildReference(p, bop, inputGen);
 					
 					hi = inputGen;
 					LOG.debug("Applied fuseDatagenAndMinusOperation (line "+bop.getBeginLine()+").");		
@@ -538,8 +513,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			}
 			//patterns: (X>0)-(X<0) -> sign(X)
 			else if( bop.getOp() == OpOp2.MINUS 
-				&& left instanceof BinaryOp && right instanceof BinaryOp
-				&& ((BinaryOp)left).getOp()==OpOp2.GREATER && ((BinaryOp)right).getOp()==OpOp2.LESS 
+				&& HopRewriteUtils.isBinary(left, OpOp2.GREATER) 
+				&& HopRewriteUtils.isBinary(right, OpOp2.LESS) 
 				&& left.getInput().get(0) == right.getInput().get(0) 
 				&& left.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)left.getInput().get(1))==0
@@ -547,15 +522,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)right.getInput().get(1))==0 )
 			{
 				UnaryOp uop = HopRewriteUtils.createUnary(left.getInput().get(0), OpOp1.SIGN);
-				
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.removeAllChildReferences(hi);
-				HopRewriteUtils.addChildReference(parent, uop, pos);
-				if( left.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences(left);
-				if( right.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences(right);
-				
+				HopRewriteUtils.replaceChildReference(parent, hi, uop, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi, left, right);
 				hi = uop;
 				
 				LOG.debug("Applied simplifyBinaryToUnaryOperation3");
@@ -598,9 +566,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					&& bop.getOp() == OpOp2.MINUS )
 			{
 				bop.setOp(OpOp2.PLUS);
-				HopRewriteUtils.removeChildReferenceByPos(bop, right, 1);
-				HopRewriteUtils.addChildReference(bop, 
-						HopRewriteUtils.createBinary(new LiteralOp(0), right, OpOp2.MINUS), 1);				
+				HopRewriteUtils.replaceChildReference(bop,  right,
+						HopRewriteUtils.createBinaryMinus(right), 1);				
 				LOG.debug("Applied canonicalizeMatrixMultScalarAdd2 (line "+hi.getBeginLine()+").");
 			}
 		}
@@ -633,14 +600,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				&& top.getInput().get(0).getDim1()==top.getInput().get(1).getDim1())
 			{
 				ReorgOp rop = HopRewriteUtils.createReorg(hi.getInput().get(1), ReOrgOp.REV);
-				
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, rop, pos);
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences(hi);
-				if( top.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences(top);
-				
+				HopRewriteUtils.replaceChildReference(parent, hi, rop, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi, top);
 				hi = rop;
 				
 				LOG.debug("Applied simplifyReverseOperation.");
@@ -653,12 +614,11 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private Hop simplifyMultiBinaryToBinaryOperation( Hop hi )
 	{
 		//pattern: 1-(X*Y) --> X 1-* Y (avoid intermediate)
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==OpOp2.MINUS
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MINUS)
 			&& hi.getDataType() == DataType.MATRIX	
 			&& hi.getInput().get(0) instanceof LiteralOp
 			&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)hi.getInput().get(0))==1
-			&& hi.getInput().get(1) instanceof BinaryOp
-			&& ((BinaryOp)hi.getInput().get(1)).getOp()==OpOp2.MULT
+			&& HopRewriteUtils.isBinary(hi.getInput().get(1), OpOp2.MULT)
 			&& hi.getInput().get(1).getParent().size() == 1 ) //single consumer
 		{
 			BinaryOp bop = (BinaryOp)hi;
@@ -703,7 +663,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				&& HopRewriteUtils.isValidOp(bop.getOp(), LOOKUP_VALID_DISTRIBUTIVE_BINARY) )
 			{
 				Hop X = null; Hop Y = null;
-				if( left instanceof BinaryOp && ((BinaryOp)left).getOp()==OpOp2.MULT ) //(Y*X-X) -> (Y-1)*X
+				if( HopRewriteUtils.isBinary(left, OpOp2.MULT) ) //(Y*X-X) -> (Y-1)*X
 				{
 					Hop leftC1 = left.getInput().get(0);
 					Hop leftC2 = left.getInput().get(1);
@@ -717,10 +677,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					if( X != null ){ //rewrite 'binary +/-' 
 						HopRewriteUtils.removeChildReference(parent, hi);
 						LiteralOp literal = new LiteralOp(1);
-						BinaryOp plus = new BinaryOp(right.getName(), right.getDataType(), right.getValueType(), bop.getOp(), Y, literal);
-						HopRewriteUtils.refreshOutputParameters(plus, right);						
-						BinaryOp mult = new BinaryOp(left.getName(), left.getDataType(), left.getValueType(), OpOp2.MULT, plus, X);
-						HopRewriteUtils.refreshOutputParameters(mult, left);
+						BinaryOp plus = HopRewriteUtils.createBinary(Y, literal, bop.getOp());
+						BinaryOp mult = HopRewriteUtils.createBinary(plus, X, OpOp2.MULT);
 						
 						HopRewriteUtils.addChildReference(parent, mult, pos);							
 						hi = mult;
@@ -730,7 +688,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					}					
 				}	
 				
-				if( !applied && right instanceof BinaryOp && ((BinaryOp)right).getOp()==OpOp2.MULT ) //(X-Y*X) -> (1-Y)*X
+				if( !applied && HopRewriteUtils.isBinary(right, OpOp2.MULT) ) //(X-Y*X) -> (1-Y)*X
 				{
 					Hop rightC1 = right.getInput().get(0);
 					Hop rightC2 = right.getInput().get(1);
@@ -740,14 +698,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						Y = ( left == rightC1 ) ? rightC2 : rightC1;
 					}
 					if( X != null ){ //rewrite '+/- binary'
-						HopRewriteUtils.removeChildReference(parent, hi);
 						LiteralOp literal = new LiteralOp(1);
-						BinaryOp plus = new BinaryOp(left.getName(), left.getDataType(), left.getValueType(), bop.getOp(), literal, Y);
-						HopRewriteUtils.refreshOutputParameters(plus, left);						
-						BinaryOp mult = new BinaryOp(right.getName(), right.getDataType(), right.getValueType(), OpOp2.MULT, plus, X);
-						HopRewriteUtils.refreshOutputParameters(mult, right);
-						
-						HopRewriteUtils.addChildReference(parent, mult, pos);	
+						BinaryOp plus = HopRewriteUtils.createBinary(literal, Y, bop.getOp());
+						BinaryOp mult = HopRewriteUtils.createBinary(plus, X, OpOp2.MULT);
+						HopRewriteUtils.replaceChildReference(parent, hi, mult, pos);	
 						hi = mult;
 
 						LOG.debug("Applied simplifyDistributiveBinaryOperation2");
@@ -797,14 +751,9 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						&& (right2 instanceof AggBinaryOp) )
 					{
 						//(X*(Y*op()) -> (X*Y)*op()
-						HopRewriteUtils.removeChildReference(parent, bop);
-						
-						BinaryOp bop3 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, op, left, left2);
-						HopRewriteUtils.refreshOutputParameters(bop3, bop);
-						BinaryOp bop4 = new BinaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, op, bop3, right2);
-						HopRewriteUtils.refreshOutputParameters(bop4, bop2);
-						
-						HopRewriteUtils.addChildReference(parent, bop4, pos);	
+						BinaryOp bop3 = HopRewriteUtils.createBinary(left, left2, op);
+						BinaryOp bop4 = HopRewriteUtils.createBinary(bop3, right2, op);
+						HopRewriteUtils.replaceChildReference(parent, bop, bop4, pos);	
 						hi = bop4;
 						
 						applied = true;
@@ -828,10 +777,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						//((op()*X)*Y) -> op()*(X*Y)
 						HopRewriteUtils.removeChildReference(parent, bop);
 						
-						BinaryOp bop3 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, op, right2, right);
-						HopRewriteUtils.refreshOutputParameters(bop3, bop2);
-						BinaryOp bop4 = new BinaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, op, left2, bop3);
-						HopRewriteUtils.refreshOutputParameters(bop4, bop);
+						BinaryOp bop3 = HopRewriteUtils.createBinary(right2, right, op);
+						BinaryOp bop4 = HopRewriteUtils.createBinary(left2, bop3, op);
 						
 						HopRewriteUtils.addChildReference(parent, bop4, pos);	
 						hi = bop4;
@@ -871,7 +818,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private Hop simplifyBinaryMatrixScalarOperation( Hop parent, Hop hi, int pos ) 
 		throws HopsException
 	{
-		if(   hi instanceof UnaryOp && ((UnaryOp)hi).getOp()==OpOp1.CAST_AS_SCALAR  
+		if( HopRewriteUtils.isUnary(hi, OpOp1.CAST_AS_SCALAR)  
 		   && hi.getInput().get(0) instanceof BinaryOp ) 
 		{
 			BinaryOp bin = (BinaryOp) hi.getInput().get(0);
@@ -896,8 +843,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			}
 			
 			if( bout != null ) {
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, bout, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, bout, pos);
 				
 				LOG.debug("Applied simplifyBinaryMatrixScalarOperation.");
 			}
@@ -910,8 +856,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	{
 		if( hi instanceof AggUnaryOp && hi.getParent().size()==1 
 			&& (((AggUnaryOp) hi).getDirection()==Direction.Row || ((AggUnaryOp) hi).getDirection()==Direction.Col)	
-			&& hi.getInput().get(0) instanceof ReorgOp && hi.getInput().get(0).getParent().size()==1
-			&& ((ReorgOp)hi.getInput().get(0)).getOp()==ReOrgOp.TRANSPOSE
+			&& HopRewriteUtils.isTransposeOperation(hi.getInput().get(0), 1) 
 			&& HopRewriteUtils.isValidOp(((AggUnaryOp) hi).getOp(), LOOKUP_VALID_ROW_COL_AGGREGATE) )
 		{
 			AggUnaryOp uagg = (AggUnaryOp) hi;
@@ -949,7 +894,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		// a=t(X), b=t(X^2) -> a=t(X), b=t(X)^2 for CSE t(X)
 		// probed at root node of b in above example
 		// (with support for left or right scalar operations)
-		if( HopRewriteUtils.isTransposeOperation(hi) && hi.getParent().size()==1
+		if( HopRewriteUtils.isTransposeOperation(hi, 1) 
 			&& HopRewriteUtils.isBinaryMatrixScalarOperation(hi.getInput().get(0))
 			&& hi.getInput().get(0).getParent().size()==1) 
 		{
@@ -982,10 +927,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private Hop pushdownSumBinaryMult(Hop parent, Hop hi, int pos ) throws HopsException {
 		//pattern:  sum(lamda*X) -> lamda*sum(X)
 		if( hi instanceof AggUnaryOp && ((AggUnaryOp)hi).getDirection()==Direction.RowCol
-				&& ((AggUnaryOp)hi).getOp()==Hop.AggOp.SUM
-				&& ((AggUnaryOp)hi).getInput().get(0) instanceof BinaryOp
-				&& ((BinaryOp)hi.getInput().get(0)).getOp()==OpOp2.MULT
-				&& hi.getInput().get(0).getParent().size() == 1   // only one parent which is the sum
+				&& ((AggUnaryOp)hi).getOp()==Hop.AggOp.SUM // only one parent which is the sum
+				&& HopRewriteUtils.isBinary(hi.getInput().get(0), OpOp2.MULT, 1)
 				&& ((hi.getInput().get(0).getInput().get(0).getDataType()==DataType.SCALAR && hi.getInput().get(0).getInput().get(1).getDataType()==DataType.MATRIX)
 					||(hi.getInput().get(0).getInput().get(0).getDataType()==DataType.MATRIX && hi.getInput().get(0).getInput().get(1).getDataType()==DataType.SCALAR)))
 		{
@@ -999,8 +942,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			AggUnaryOp aggOp=HopRewriteUtils.createAggUnaryOp(matrix, AggOp.SUM, Direction.RowCol);
 			Hop bop = HopRewriteUtils.createBinary(lamda, aggOp, OpOp2.MULT);
 			
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, bop, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, bop, pos);
 			
 			LOG.debug("Applied pushdownSumBinaryMult.");
 			return bop;
@@ -1021,10 +963,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				//clear link unary-binary
 				Hop input = uop.getInput().get(0);
-				HopRewriteUtils.removeAllChildReferences(hi);
-				
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, input, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi);
 				hi = input;
 				
 				LOG.debug("Applied simplifyUnaryPPredOperation.");	
@@ -1037,29 +977,25 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private Hop simplifyTransposedAppend( Hop parent, Hop hi, int pos )
 	{
 		//e.g., t(cbind(t(A),t(B))) --> rbind(A,B), t(rbind(t(A),t(B))) --> cbind(A,B)		
-		if(   hi instanceof ReorgOp && ((ReorgOp)hi).getOp()==ReOrgOp.TRANSPOSE  //t() rooted
+		if(   HopRewriteUtils.isTransposeOperation(hi)  //t() rooted
 		   && hi.getInput().get(0) instanceof BinaryOp
 		   && (((BinaryOp)hi.getInput().get(0)).getOp()==OpOp2.CBIND    //append (cbind/rbind)
 		    || ((BinaryOp)hi.getInput().get(0)).getOp()==OpOp2.RBIND) 
 		   && hi.getInput().get(0).getParent().size() == 1 ) //single consumer of append
 		{
 			BinaryOp bop = (BinaryOp)hi.getInput().get(0);
-			if( bop.getInput().get(0) instanceof ReorgOp  //both inputs transpose ops
-				&& ((ReorgOp)bop.getInput().get(0)).getOp()==ReOrgOp.TRANSPOSE
-				&& bop.getInput().get(0).getParent().size() == 1 //single consumer of transpose
-				&& bop.getInput().get(1) instanceof ReorgOp 
-				&& ((ReorgOp)bop.getInput().get(1)).getOp()==ReOrgOp.TRANSPOSE
-				&& bop.getInput().get(1).getParent().size() == 1 ) //single consumer of transpose
+			//both inputs transpose ops, where transpose is single consumer
+			if( HopRewriteUtils.isTransposeOperation(bop.getInput().get(0), 1)  
+				&& HopRewriteUtils.isTransposeOperation(bop.getInput().get(1), 1) )
 			{
 				Hop left = bop.getInput().get(0).getInput().get(0);
 				Hop right = bop.getInput().get(1).getInput().get(0);
 				
 				//create new subdag (no in-place dag update to prevent anomalies with
 				//multiple consumers during rewrite process)
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
 				OpOp2 binop = (bop.getOp()==OpOp2.CBIND) ? OpOp2.RBIND : OpOp2.CBIND;
 				BinaryOp bopnew = HopRewriteUtils.createBinary(left, right, binop);
-				HopRewriteUtils.addChildReference(parent, bopnew, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, bopnew, pos);
 				
 				hi = bopnew;
 				LOG.debug("Applied simplifyTransposedAppend (line "+hi.getBeginLine()+").");				
@@ -1109,15 +1045,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						left2 == right && bleft.getOp() == OpOp2.MINUS  ) 
 					{
 						UnaryOp unary = HopRewriteUtils.createUnary(right, OpOp1.SPROP);
-						HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-						HopRewriteUtils.addChildReference(parent, unary, pos);
-						
-						//cleanup if only consumer of intermediate
-						if( bop.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(bop);					
-						if( left.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences(left);
-						
+						HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+						HopRewriteUtils.cleanupUnreferenced(bop, left);
 						hi = unary;
 						
 						LOG.debug("Applied fuseBinarySubDAGToUnaryOperation-sprop1");
@@ -1134,15 +1063,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						right2 == left && bright.getOp() == OpOp2.MINUS )
 					{
 						UnaryOp unary = HopRewriteUtils.createUnary(left, OpOp1.SPROP);
-						HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-						HopRewriteUtils.addChildReference(parent, unary, pos);
-						
-						//cleanup if only consumer of intermediate
-						if( bop.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(bop);					
-						if( left.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences(right);
-						
+						HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+						HopRewriteUtils.cleanupUnreferenced(bop, left);
 						hi = unary;
 						
 						LOG.debug("Applied fuseBinarySubDAGToUnaryOperation-sprop2");
@@ -1172,37 +1094,24 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						UnaryOp unary = null;
 						
 						//Pattern 1: (1/(1 + exp(-X)) 
-						if( uopin instanceof BinaryOp && ((BinaryOp)uopin).getOp()==OpOp2.MINUS )
-						{
+						if( HopRewriteUtils.isBinary(uopin, OpOp2.MINUS) ) {
 							BinaryOp bop3 = (BinaryOp) uopin;
 							Hop left3 = bop3.getInput().get(0);
 							Hop right3 = bop3.getInput().get(1);
 							
-							if( left3 instanceof LiteralOp && HopRewriteUtils.getDoubleValue((LiteralOp)left3)==0 ) {
+							if( left3 instanceof LiteralOp && HopRewriteUtils.getDoubleValue((LiteralOp)left3)==0 )
 								unary = HopRewriteUtils.createUnary(right3, OpOp1.SIGMOID);
-							}	
 						}						
 						//Pattern 2: (1/(1 + exp(X)), e.g., where -(-X) has been removed by 
 						//the 'remove unnecessary minus' rewrite --> reintroduce the minus
-						else
-						{
-							BinaryOp minus = HopRewriteUtils.createMinus(uopin);
+						else {
+							BinaryOp minus = HopRewriteUtils.createBinaryMinus(uopin);
 							unary = HopRewriteUtils.createUnary(minus, OpOp1.SIGMOID);
 						}	
 					
-						if( unary != null )
-						{
-							HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-							HopRewriteUtils.addChildReference(parent, unary, pos);
-							
-							//cleanup if only consumer of intermediate
-							if( bop.getParent().isEmpty() )
-								HopRewriteUtils.removeAllChildReferences(bop);	
-							if( bop2.getParent().isEmpty() )
-								HopRewriteUtils.removeAllChildReferences(bop2);	
-							if( uop.getParent().isEmpty() )
-								HopRewriteUtils.removeAllChildReferences(uop);	
-							
+						if( unary != null ) {
+							HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+							HopRewriteUtils.cleanupUnreferenced(bop, bop2, uop);
 							hi = unary;
 							
 							LOG.debug("Applied fuseBinarySubDAGToUnaryOperation-sigmoid1");
@@ -1229,14 +1138,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						left1 == right && bleft.getOp() == OpOp2.GREATER  ) 
 					{
 						UnaryOp unary = HopRewriteUtils.createUnary(right, OpOp1.SELP);
-						HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-						HopRewriteUtils.addChildReference(parent, unary, pos);
-						
-						//cleanup if only consumer of intermediate
-						if( bop.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(bop);					
-						if( left.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences(left);
+						HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+						HopRewriteUtils.cleanupUnreferenced(bop, left);
 						
 						hi = unary;
 						applied = true;
@@ -1255,14 +1158,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 						right1 == left && bright.getOp() == OpOp2.GREATER )
 					{
 						UnaryOp unary = HopRewriteUtils.createUnary(left, OpOp1.SELP);
-						HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-						HopRewriteUtils.addChildReference(parent, unary, pos);
-						
-						//cleanup if only consumer of intermediate
-						if( bop.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(bop);					
-						if( left.getParent().isEmpty() ) 
-							HopRewriteUtils.removeAllChildReferences(right);
+						HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+						HopRewriteUtils.cleanupUnreferenced(bop, left);
 						
 						hi = unary;
 						applied= true;
@@ -1277,12 +1174,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					&& right instanceof LiteralOp && HopRewriteUtils.getDoubleValue((LiteralOp)right)==0 )
 			{
 				UnaryOp unary = HopRewriteUtils.createUnary(left, OpOp1.SELP);
-				HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-				HopRewriteUtils.addChildReference(parent, unary, pos);
-				
-				//cleanup if only consumer of intermediate
-				if( bop.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences(bop);					
+				HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+				HopRewriteUtils.cleanupUnreferenced(bop);
 				hi = unary;
 				
 				LOG.debug("Applied fuseBinarySubDAGToUnaryOperation-selp3");
@@ -1293,12 +1186,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					&& left instanceof LiteralOp && HopRewriteUtils.getDoubleValue((LiteralOp)left)==0 )
 			{
 				UnaryOp unary = HopRewriteUtils.createUnary(right, OpOp1.SELP);
-				HopRewriteUtils.removeChildReferenceByPos(parent, bop, pos);
-				HopRewriteUtils.addChildReference(parent, unary, pos);
-				
-				//cleanup if only consumer of intermediate
-				if( bop.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences(bop);					
+				HopRewriteUtils.replaceChildReference(parent, bop, unary, pos);
+				HopRewriteUtils.cleanupUnreferenced(bop);
 				hi = unary;
 				
 				LOG.debug("Applied fuseBinarySubDAGToUnaryOperation-selp4");
@@ -1313,36 +1202,19 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		if( hi instanceof AggUnaryOp && ((AggUnaryOp)hi).getOp()==AggOp.TRACE ) //trace()
 		{
 			Hop hi2 = hi.getInput().get(0);
-			if( hi2 instanceof AggBinaryOp && ((AggBinaryOp)hi2).isMatrixMultiply() ) //X%*%Y
+			if( HopRewriteUtils.isMatrixMultiply(hi2) ) //X%*%Y
 			{
 				Hop left = hi2.getInput().get(0);
 				Hop right = hi2.getInput().get(1);
 				
-				//remove link from parent to diag
-				HopRewriteUtils.removeChildReference(parent, hi);
-				
-				//remove links to inputs to matrix mult
-				//removeChildReference(hi2, left);
-				//removeChildReference(hi2, right);
-				
 				//create new operators (incl refresh size inside for transpose)
 				ReorgOp trans = HopRewriteUtils.createTranspose(right);
-				BinaryOp mult = new BinaryOp(right.getName(), right.getDataType(), right.getValueType(), OpOp2.MULT, left, trans);
-				mult.setRowsInBlock(right.getRowsInBlock());
-				mult.setColsInBlock(right.getColsInBlock());
-				mult.refreshSizeInformation();
-				AggUnaryOp sum = new AggUnaryOp(right.getName(), DataType.SCALAR, right.getValueType(), AggOp.SUM, Direction.RowCol, mult);
-				sum.refreshSizeInformation();
+				BinaryOp mult = HopRewriteUtils.createBinary(left, trans, OpOp2.MULT);
+				AggUnaryOp sum = HopRewriteUtils.createSum(mult);
 				
 				//rehang new subdag under parent node
-				HopRewriteUtils.addChildReference(parent, sum, pos);
-				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
-				if( hi2.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi2 );
-				
+				HopRewriteUtils.replaceChildReference(parent, hi, sum, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi, hi2);
 				hi = sum;
 				
 				LOG.debug("Applied simplifyTraceMatrixMult");
@@ -1360,8 +1232,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			&& ((IndexingOp)hi).getRowLowerEqualsUpper()
 			&& ((IndexingOp)hi).getColLowerEqualsUpper()  
 			&& hi.getInput().get(0).getParent().size()==1 //rix is single mm consumer
-			&& hi.getInput().get(0) instanceof AggBinaryOp 
-			&& ((AggBinaryOp)hi.getInput().get(0)).isMatrixMultiply() )
+			&& HopRewriteUtils.isMatrixMultiply(hi.getInput().get(0)) )
 		{
 			Hop mm = hi.getInput().get(0);
 			Hop X = mm.getInput().get(0);
@@ -1374,11 +1245,11 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			//create new indexing operations
 			IndexingOp ix1 = new IndexingOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, X, 
 					rowExpr, rowExpr, new LiteralOp(1), HopRewriteUtils.createValueHop(X, false), true, false);
-			HopRewriteUtils.setOutputBlocksizes(ix1, X.getRowsInBlock(), X.getColsInBlock());
+			ix1.setOutputBlocksizes(X.getRowsInBlock(), X.getColsInBlock());
 			ix1.refreshSizeInformation();
 			IndexingOp ix2 = new IndexingOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, Y, 
 					new LiteralOp(1), HopRewriteUtils.createValueHop(Y, true), colExpr, colExpr, false, true);
-			HopRewriteUtils.setOutputBlocksizes(ix2, Y.getRowsInBlock(), Y.getColsInBlock());
+			ix2.setOutputBlocksizes(Y.getRowsInBlock(), Y.getColsInBlock());
 			ix2.refreshSizeInformation();
 			
 			//rewire matrix mult over ix1 and ix2
@@ -1410,12 +1281,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				if( HopRewriteUtils.getBooleanValue((LiteralOp)hi.getInput().get(3)) )
 				{
 					//order(matrix(7), indexreturn=TRUE) -> seq(1,nrow(X),1)
-					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
 					Hop seq = HopRewriteUtils.createSeqDataGenOp(hi2);
 					seq.refreshSizeInformation();
-					HopRewriteUtils.addChildReference(parent, seq, pos);
-					if( hi.getParent().isEmpty() )
-						HopRewriteUtils.removeChildReference(hi, hi2);
+					HopRewriteUtils.replaceChildReference(parent, hi, seq, pos);
+					HopRewriteUtils.cleanupUnreferenced(hi);
 					hi = seq;
 					
 					LOG.debug("Applied simplifyConstantSort1.");
@@ -1423,10 +1292,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				else
 				{
 					//order(matrix(7), indexreturn=FALSE) -> matrix(7)
-					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-					HopRewriteUtils.addChildReference(parent, hi2, pos);
-					if( hi.getParent().isEmpty() )
-						HopRewriteUtils.removeChildReference(hi, hi2);
+					HopRewriteUtils.replaceChildReference(parent, hi, hi2, pos);
+					HopRewriteUtils.cleanupUnreferenced(hi);
 					hi = hi2;
 					
 					LOG.debug("Applied simplifyConstantSort2.");
@@ -1458,12 +1325,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					{
 						//order(seq(2,N+1,1), indexreturn=TRUE) -> seq(1,N,1)/seq(N,1,-1)
 						boolean desc = HopRewriteUtils.getBooleanValue((LiteralOp)hi.getInput().get(2));
-						HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
 						Hop seq = HopRewriteUtils.createSeqDataGenOp(hi2, !desc);
 						seq.refreshSizeInformation();
-						HopRewriteUtils.addChildReference(parent, seq, pos);
-						if( hi.getParent().isEmpty() )
-							HopRewriteUtils.removeChildReference(hi, hi2);
+						HopRewriteUtils.replaceChildReference(parent, hi, seq, pos);
+						HopRewriteUtils.cleanupUnreferenced(hi);
 						hi = seq;
 						
 						LOG.debug("Applied simplifyOrderedSort1.");
@@ -1471,10 +1336,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					else if( !HopRewriteUtils.getBooleanValue((LiteralOp)hi.getInput().get(2)) ) //DATA, ASC
 					{
 						//order(seq(2,N+1,1), indexreturn=FALSE) -> seq(2,N+1,1)
-						HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-						HopRewriteUtils.addChildReference(parent, hi2, pos);
-						if( hi.getParent().isEmpty() )
-							HopRewriteUtils.removeChildReference(hi, hi2);
+						HopRewriteUtils.replaceChildReference(parent, hi, hi2, pos);
+						HopRewriteUtils.cleanupUnreferenced(hi);
 						hi = hi2;
 						
 						LOG.debug("Applied simplifyOrderedSort2.");
@@ -1498,7 +1361,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private Hop simplifyTransposeAggBinBinaryChains(Hop parent, Hop hi, int pos) 
 		throws HopsException
 	{
-		if( hi instanceof ReorgOp && ((ReorgOp)hi).getOp()==ReOrgOp.TRANSPOSE //transpose
+		if( HopRewriteUtils.isTransposeOperation(hi)
 			&& hi.getInput().get(0) instanceof BinaryOp                       //basic binary
 			&& ((BinaryOp)hi.getInput().get(0)).supportsMatrixScalarOperations()) 
 		{
@@ -1507,10 +1370,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			
 			//check matrix mult and both inputs transposes w/ single consumer
 			if( left instanceof AggBinaryOp && C.getDataType().isMatrix()
-				&& left.getInput().get(0).getParent().size()==1 && left.getInput().get(0) instanceof ReorgOp
-				&& ((ReorgOp)left.getInput().get(0)).getOp()==ReOrgOp.TRANSPOSE     
-				&& left.getInput().get(1).getParent().size()==1 && left.getInput().get(1) instanceof ReorgOp
-				&& ((ReorgOp)left.getInput().get(1)).getOp()==ReOrgOp.TRANSPOSE )
+				&& HopRewriteUtils.isTransposeOperation(left.getInput().get(0))     
+				&& left.getInput().get(0).getParent().size()==1 
+				&& HopRewriteUtils.isTransposeOperation(left.getInput().get(1))
+				&& left.getInput().get(1).getParent().size()==1 )
 			{
 				Hop A = left.getInput().get(0).getInput().get(0);
 				Hop B = left.getInput().get(1).getInput().get(0);
@@ -1519,8 +1382,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				ReorgOp rop = HopRewriteUtils.createTranspose(C);
 				BinaryOp bop = HopRewriteUtils.createBinary(abop, rop, OpOp2.PLUS);
 				
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, bop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, bop, pos);
 				
 				hi = bop;
 				LOG.debug("Applied simplifyTransposeAggBinBinaryChains (line "+hi.getBeginLine()+").");						
@@ -1550,16 +1412,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				Hop hi3 = hi2.getInput().get(0);
 				//remove unnecessary chain of t(t())
-				HopRewriteUtils.removeChildReference(parent, hi);
-				HopRewriteUtils.addChildReference(parent, hi3, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, hi3, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi, hi2);
 				hi = hi3;
 				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
-				if( hi2.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi2 );
-				
 				LOG.debug("Applied removeUnecessaryReorgOperation.");
 			}
 		}
@@ -1582,16 +1438,10 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			{
 				Hop hi3 = hi2.getInput().get(1);
 				//remove unnecessary chain of -(-())
-				HopRewriteUtils.removeChildReference(parent, hi);
-				HopRewriteUtils.addChildReference(parent, hi3, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, hi3, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi, hi2);
 				hi = hi3;
 				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
-				if( hi2.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi2 );
-				
 				LOG.debug("Applied removeUnecessaryMinus");
 			}
 		}
@@ -1618,8 +1468,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 					Hop th = phi.getInput().get(ix1);
 					Hop gh = phi.getInput().get(ix2);
 					
-					HopRewriteUtils.removeChildReference(hi, th);
-					HopRewriteUtils.addChildReference(hi, gh, ix1);
+					HopRewriteUtils.replaceChildReference(hi, th, gh, ix1);
 					
 					LOG.debug("Applied simplifyGroupedAggregateCount");	
 				}
@@ -1635,29 +1484,25 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		//pattern X - (s * ppred(X,0,!=)) -> X -nz s
 		//note: this is done as a hop rewrite in order to significantly reduce the 
 		//memory estimate for X - tmp if X is sparse 
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==OpOp2.MINUS
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MINUS)
 			&& hi.getInput().get(0).getDataType()==DataType.MATRIX
 			&& hi.getInput().get(1).getDataType()==DataType.MATRIX
-			&& hi.getInput().get(1) instanceof BinaryOp 
-			&& ((BinaryOp)hi.getInput().get(1)).getOp()==OpOp2.MULT )
+			&& HopRewriteUtils.isBinary(hi.getInput().get(1), OpOp2.MULT) )
 		{
 			Hop X = hi.getInput().get(0);
 			Hop s = hi.getInput().get(1).getInput().get(0);
 			Hop pred = hi.getInput().get(1).getInput().get(1);
 			
 			if( s.getDataType()==DataType.SCALAR && pred.getDataType()==DataType.MATRIX
-				&& pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL
+				&& HopRewriteUtils.isBinary(pred, OpOp2.NOTEQUAL)
 				&& pred.getInput().get(0) == X //depend on common subexpression elimination
 				&& pred.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)pred.getInput().get(1))==0 )
 			{
-				Hop hnew = new BinaryOp("tmp", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MINUS_NZ, X, s);
-				HopRewriteUtils.setOutputBlocksizes(hnew, hi.getRowsInBlock(), hi.getColsInBlock());
-				hnew.refreshSizeInformation();
-		
+				Hop hnew = HopRewriteUtils.createBinary(X, s, OpOp2.MINUS_NZ); 
+				
 				//relink new hop into original position
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 				hi = hnew;
 				
 				LOG.debug("Applied fuseMinusNzBinaryOperation (line "+hi.getBeginLine()+")");	
@@ -1673,27 +1518,23 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		//pattern ppred(X,0,"!=")*log(X) -> log_nz(X)
 		//note: this is done as a hop rewrite in order to significantly reduce the 
 		//memory estimate and to prevent dense intermediates if X is ultra sparse  
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==OpOp2.MULT
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MULT)
 			&& hi.getInput().get(0).getDataType()==DataType.MATRIX
 			&& hi.getInput().get(1).getDataType()==DataType.MATRIX
-			&& hi.getInput().get(1) instanceof UnaryOp 
-			&& ((UnaryOp)hi.getInput().get(1)).getOp()==OpOp1.LOG )
+			&& HopRewriteUtils.isUnary(hi.getInput().get(1), OpOp1.LOG) )
 		{
 			Hop pred = hi.getInput().get(0);
 			Hop X = hi.getInput().get(1).getInput().get(0);
 			
-			if(    pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL
+			if( HopRewriteUtils.isBinary(pred, OpOp2.NOTEQUAL)
 				&& pred.getInput().get(0) == X //depend on common subexpression elimination
 				&& pred.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)pred.getInput().get(1))==0 )
 			{
-				Hop hnew = new UnaryOp("tmp", DataType.MATRIX, ValueType.DOUBLE, OpOp1.LOG_NZ, X);
-				HopRewriteUtils.setOutputBlocksizes(hnew, hi.getRowsInBlock(), hi.getColsInBlock());
-				hnew.refreshSizeInformation();
-		
+				Hop hnew = HopRewriteUtils.createUnary(X, OpOp1.LOG_NZ);
+				
 				//relink new hop into original position
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 				hi = hnew;
 				
 				LOG.debug("Applied fuseLogNzUnaryOperation (line "+hi.getBeginLine()+").");	
@@ -1709,28 +1550,24 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		//pattern ppred(X,0,"!=")*log(X,0.5) -> log_nz(X,0.5)
 		//note: this is done as a hop rewrite in order to significantly reduce the 
 		//memory estimate and to prevent dense intermediates if X is ultra sparse  
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==OpOp2.MULT
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MULT)
 			&& hi.getInput().get(0).getDataType()==DataType.MATRIX
 			&& hi.getInput().get(1).getDataType()==DataType.MATRIX
-			&& hi.getInput().get(1) instanceof BinaryOp 
-			&& ((BinaryOp)hi.getInput().get(1)).getOp()==OpOp2.LOG )
+			&& HopRewriteUtils.isBinary(hi.getInput().get(1), OpOp2.LOG) )
 		{
 			Hop pred = hi.getInput().get(0);
 			Hop X = hi.getInput().get(1).getInput().get(0);
 			Hop log = hi.getInput().get(1).getInput().get(1);
 			
-			if(    pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL
+			if( HopRewriteUtils.isBinary(pred, OpOp2.NOTEQUAL)
 				&& pred.getInput().get(0) == X //depend on common subexpression elimination
 				&& pred.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)pred.getInput().get(1))==0 )
 			{
-				Hop hnew = new BinaryOp("tmp", DataType.MATRIX, ValueType.DOUBLE, OpOp2.LOG_NZ, X, log);
-				HopRewriteUtils.setOutputBlocksizes(hnew, hi.getRowsInBlock(), hi.getColsInBlock());
-				hnew.refreshSizeInformation();
-		
+				Hop hnew = HopRewriteUtils.createBinary(X, log, OpOp2.LOG_NZ);
+				
 				//relink new hop into original position
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 				hi = hnew;
 				
 				LOG.debug("Applied fuseLogNzBinaryOperation (line "+hi.getBeginLine()+")");	
@@ -1746,18 +1583,15 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 		//pattern: outer(v, t(seq(1,m)), "==") -> rexpand(v, max=m, dir=row, ignore=true, cast=false)
 		//note: this rewrite supports both left/right sequence 
 		
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).isOuterVectorOperator()
-			&& ((BinaryOp)hi).getOp()==OpOp2.EQUAL )
+		if( HopRewriteUtils.isBinary(hi, OpOp2.EQUAL) && ((BinaryOp)hi).isOuterVectorOperator() )
 		{
-			if(   ( hi.getInput().get(1) instanceof ReorgOp                 //pattern a: outer(v, t(seq(1,m)), "==")
-				    && ((ReorgOp) hi.getInput().get(1)).getOp()==ReOrgOp.TRANSPOSE
+			if(   ( HopRewriteUtils.isTransposeOperation(hi.getInput().get(1)) //pattern a: outer(v, t(seq(1,m)), "==")
 				    && HopRewriteUtils.isBasic1NSequence(hi.getInput().get(1).getInput().get(0))) 
 				|| HopRewriteUtils.isBasic1NSequence(hi.getInput().get(0))) //pattern b: outer(seq(1,m), t(v) "==")
 			{
 				//determine variable parameters for pattern a/b
 				boolean isPatternB = HopRewriteUtils.isBasic1NSequence(hi.getInput().get(0));
-				boolean isTransposeRight = (hi.getInput().get(1) instanceof ReorgOp 
-						&& ((ReorgOp) hi.getInput().get(1)).getOp()==ReOrgOp.TRANSPOSE);				
+				boolean isTransposeRight = HopRewriteUtils.isTransposeOperation(hi.getInput().get(1));				
 				Hop trgt = isPatternB ? (isTransposeRight ? 
 						hi.getInput().get(1).getInput().get(0) :                  //get v from t(v)
 						HopRewriteUtils.createTranspose(hi.getInput().get(1)) ) : //create v via t(v')
@@ -1777,12 +1611,11 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				//create new hop
 				ParameterizedBuiltinOp pbop = new ParameterizedBuiltinOp("tmp", DataType.MATRIX, ValueType.DOUBLE, 
 						ParamBuiltinOp.REXPAND, inputargs);
-				HopRewriteUtils.setOutputBlocksizes(pbop, hi.getRowsInBlock(), hi.getColsInBlock());
+				pbop.setOutputBlocksizes(hi.getRowsInBlock(), hi.getColsInBlock());
 				pbop.refreshSizeInformation();
 		
 				//relink new hop into original position
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, pbop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, pbop, pos);
 				hi = pbop;
 				
 				LOG.debug("Applied simplifyOuterSeqExpand (line "+hi.getBeginLine()+")");	
@@ -1824,12 +1657,11 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 				//create new hop
 				ParameterizedBuiltinOp pbop = new ParameterizedBuiltinOp("tmp", DataType.MATRIX, ValueType.DOUBLE, 
 						ParamBuiltinOp.REXPAND, inputargs);
-				HopRewriteUtils.setOutputBlocksizes(pbop, hi.getRowsInBlock(), hi.getColsInBlock());
+				pbop.setOutputBlocksizes(hi.getRowsInBlock(), hi.getColsInBlock());
 				pbop.refreshSizeInformation();
 		
 				//relink new hop into original position
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, pbop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, pbop, pos);
 				hi = pbop;
 				
 				LOG.debug("Applied simplifyTableSeqExpand (line "+hi.getBeginLine()+")");	
@@ -1869,10 +1701,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			if( left==right && bop.getOp()==OpOp2.NOTEQUAL || bop.getOp()==OpOp2.GREATER || bop.getOp()==OpOp2.LESS )
 				datagen = HopRewriteUtils.createDataGenOp(left, 0);
 					
-			if( datagen != null )
-			{
-				HopRewriteUtils.removeChildReference(parent, hi);
-				HopRewriteUtils.addChildReference(parent, datagen, pos);
+			if( datagen != null ) {
+				HopRewriteUtils.replaceChildReference(parent, hi, datagen, pos);
 				hi = datagen;
 			}
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteConstantFolding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteConstantFolding.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteConstantFolding.java
index 82babd1..0b4faf6 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteConstantFolding.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteConstantFolding.java
@@ -226,11 +226,8 @@ public class RewriteConstantFolding extends HopRewriteRule
 		ec.getVariables().removeAll();
 		
 		//set literal properties (scalar)
- 		literal.setDim1(0);
-		literal.setDim2(0);
-		literal.setRowsInBlock(-1);
-		literal.setColsInBlock(-1);
-		
+		HopRewriteUtils.setOutputParametersForScalar(literal);
+ 		
 		//System.out.println("Constant folded in "+time.stop()+"ms.");
 		
 		return literal;
@@ -278,8 +275,7 @@ public class RewriteConstantFolding extends HopRewriteRule
 		throws HopsException
 	{
 		ArrayList<Hop> in = hop.getInput();
-		return (   hop instanceof BinaryOp 
-				&& ((BinaryOp)hop).getOp()==OpOp2.AND
+		return (   HopRewriteUtils.isBinary(hop, OpOp2.AND)
 				&& ( (in.get(0) instanceof LiteralOp && !((LiteralOp)in.get(0)).getBooleanValue())   
 				   ||(in.get(1) instanceof LiteralOp && !((LiteralOp)in.get(1)).getBooleanValue())) );			
 	}
@@ -288,8 +284,7 @@ public class RewriteConstantFolding extends HopRewriteRule
 		throws HopsException
 	{
 		ArrayList<Hop> in = hop.getInput();
-		return (   hop instanceof BinaryOp 
-				&& ((BinaryOp)hop).getOp()==OpOp2.OR
+		return (   HopRewriteUtils.isBinary(hop, OpOp2.OR)
 				&& ( (in.get(0) instanceof LiteralOp && ((LiteralOp)in.get(0)).getBooleanValue())   
 				   ||(in.get(1) instanceof LiteralOp && ((LiteralOp)in.get(1)).getBooleanValue())) );			
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
index 08a9599..991dedd 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteForLoopVectorization.java
@@ -40,7 +40,6 @@ import org.apache.sysml.parser.IfStatementBlock;
 import org.apache.sysml.parser.StatementBlock;
 import org.apache.sysml.parser.WhileStatementBlock;
 import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
 
 /**
  * Rule: Simplify program structure by pulling if or else statement body out
@@ -181,8 +180,7 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 			AggOp aggOp = MAP_SCALAR_AGGREGATE_TARGET_OPS[aggOpPos];
 			
 			//replace cast with sum
-			AggUnaryOp newSum = new AggUnaryOp(cast.getName(), DataType.SCALAR, ValueType.DOUBLE, 
-					aggOp, Direction.RowCol, ix);
+			AggUnaryOp newSum = HopRewriteUtils.createAggUnaryOp(ix, aggOp, Direction.RowCol);
 			HopRewriteUtils.removeChildReference(cast, ix);
 			HopRewriteUtils.removeChildReference(bop, cast);
 			HopRewriteUtils.addChildReference(bop, newSum, leftScalar?1:0 );
@@ -191,10 +189,8 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 			//NOTE: any redundant index operations are removed via dynamic algebraic simplification rewrites
 			int index1 = rowIx ? 1 : 3;
 			int index2 = rowIx ? 2 : 4;
-			HopRewriteUtils.removeChildReferenceByPos(ix, ix.getInput().get(index1), index1);
-			HopRewriteUtils.addChildReference(ix, from, index1);
-			HopRewriteUtils.removeChildReferenceByPos(ix, ix.getInput().get(index2), index2);
-			HopRewriteUtils.addChildReference(ix, to, index2);
+			HopRewriteUtils.replaceChildReference(ix, ix.getInput().get(index1), from, index1);
+			HopRewriteUtils.replaceChildReference(ix, ix.getInput().get(index2), to, index2);
 			
 			//update indexing size information
 			if( rowIx )
@@ -288,21 +284,13 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 			int index1 = rowIx ? 2 : 4;
 			int index2 = rowIx ? 3 : 5;
 			//modify left indexing bounds
-			HopRewriteUtils.removeChildReferenceByPos(lix, lix.getInput().get(index1), index1 );
-			HopRewriteUtils.addChildReference(lix, from, index1);
-			HopRewriteUtils.removeChildReferenceByPos(lix, lix.getInput().get(index2), index2 );
-			HopRewriteUtils.addChildReference(lix, to, index2);
+			HopRewriteUtils.replaceChildReference(lix, lix.getInput().get(index1),from, index1);
+			HopRewriteUtils.replaceChildReference(lix, lix.getInput().get(index2),to, index2);
 			//modify both right indexing
-			HopRewriteUtils.removeChildReferenceByPos(rix0, rix0.getInput().get(index1-1), index1-1 );
-			HopRewriteUtils.addChildReference(rix0, from, index1-1);
-			HopRewriteUtils.removeChildReferenceByPos(rix0, rix0.getInput().get(index2-1), index2-1 );
-			HopRewriteUtils.addChildReference(rix0, to, index2-1);
-			HopRewriteUtils.removeChildReferenceByPos(rix1, rix1.getInput().get(index1-1), index1-1 );
-			HopRewriteUtils.addChildReference(rix1, from, index1-1);
-			HopRewriteUtils.removeChildReferenceByPos(rix1, rix1.getInput().get(index2-1), index2-1 );
-			HopRewriteUtils.addChildReference(rix1, to, index2-1);
-			rix0.refreshSizeInformation();
-			rix1.refreshSizeInformation();
+			HopRewriteUtils.replaceChildReference(rix0, rix0.getInput().get(index1-1), from, index1-1);
+			HopRewriteUtils.replaceChildReference(rix0, rix0.getInput().get(index2-1), to, index2-1);
+			HopRewriteUtils.replaceChildReference(rix1, rix1.getInput().get(index1-1), from, index1-1);
+			HopRewriteUtils.replaceChildReference(rix1, rix1.getInput().get(index2-1), to, index2-1);
 			bop.refreshSizeInformation();
 			lix.refreshSizeInformation();
 			
@@ -385,16 +373,11 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 			int index1 = rowIx ? 2 : 4;
 			int index2 = rowIx ? 3 : 5;
 			//modify left indexing bounds
-			HopRewriteUtils.removeChildReferenceByPos(lix, lix.getInput().get(index1), index1 );
-			HopRewriteUtils.addChildReference(lix, from, index1);
-			HopRewriteUtils.removeChildReferenceByPos(lix, lix.getInput().get(index2), index2 );
-			HopRewriteUtils.addChildReference(lix, to, index2);
+			HopRewriteUtils.replaceChildReference(lix, lix.getInput().get(index1), from, index1);
+			HopRewriteUtils.replaceChildReference(lix, lix.getInput().get(index2), to, index2);
 			//modify right indexing
-			HopRewriteUtils.removeChildReferenceByPos(rix, rix.getInput().get(index1-1), index1-1 );
-			HopRewriteUtils.addChildReference(rix, from, index1-1);
-			HopRewriteUtils.removeChildReferenceByPos(rix, rix.getInput().get(index2-1), index2-1 );
-			HopRewriteUtils.addChildReference(rix, to, index2-1);
-			rix.refreshSizeInformation();
+			HopRewriteUtils.replaceChildReference(rix, rix.getInput().get(index1-1), from, index1-1);
+			HopRewriteUtils.replaceChildReference(rix, rix.getInput().get(index2-1), to, index2-1);
 			uop.refreshSizeInformation();
 			lix.refreshSizeInformation();
 			
@@ -405,6 +388,4 @@ public class RewriteForLoopVectorization extends StatementBlockRewriteRule
 		
 		return ret;
 	}
-	
-	
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
index c770644..cf5ebce 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteIndexingVectorization.java
@@ -177,10 +177,8 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 					for( Hop c : ihops ) {
 						HopRewriteUtils.removeChildReference(c, input); //input data
 						HopRewriteUtils.addChildReference(c, newRix, 0);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(3),3); //col lower expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 3);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(4),4); //col upper expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 4);
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(3), new LiteralOp(1), 3); //col lower expr
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(4), new LiteralOp(1), 4); //col upper expr 
 						c.refreshSizeInformation();
 					}
 
@@ -239,10 +237,8 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 					//reset row index all candidates and refresh sizes (bottom-up)
 					for( int i=ihops.size()-1; i>=0; i-- ) {
 						Hop c = ihops.get(i);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(2), 2); //row lower expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 2);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(3), 3); //row upper expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 3);
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(2), new LiteralOp(1), 2); //row lower expr
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(3), new LiteralOp(1), 3); //row upper expr
 						((LeftIndexingOp)c).setRowLowerEqualsUpper(true);
 						c.refreshSizeInformation();
 					}
@@ -313,10 +309,8 @@ public class RewriteIndexingVectorization extends HopRewriteRule
 					//reset col index all candidates and refresh sizes (bottom-up)
 					for( int i=ihops.size()-1; i>=0; i-- ) {
 						Hop c = ihops.get(i);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(4), 4); //col lower expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 4);
-						HopRewriteUtils.removeChildReferenceByPos(c, c.getInput().get(5), 5); //col upper expr
-						HopRewriteUtils.addChildReference(c, new LiteralOp(1), 5);
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(4), new LiteralOp(1), 4); //col lower expr
+						HopRewriteUtils.replaceChildReference(c, c.getInput().get(5), new LiteralOp(1), 5); //col upper expr
 						((LeftIndexingOp)c).setColLowerEqualsUpper(true);
 						c.refreshSizeInformation();
 					}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteMatrixMultChainOptimization.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteMatrixMultChainOptimization.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteMatrixMultChainOptimization.java
index bb87fe8..9445fcb 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteMatrixMultChainOptimization.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteMatrixMultChainOptimization.java
@@ -96,7 +96,7 @@ public class RewriteMatrixMultChainOptimization extends HopRewriteRule
 		if(hop.getVisited() == Hop.VisitStatus.DONE)
 				return;
 		
-		if (  hop instanceof AggBinaryOp && ((AggBinaryOp) hop).isMatrixMultiply()
+		if (  HopRewriteUtils.isMatrixMultiply(hop)
 			  && !((AggBinaryOp)hop).hasLeftPMInput() 
 			  && hop.getVisited() != Hop.VisitStatus.DONE ) 
 		{
@@ -159,7 +159,7 @@ public class RewriteMatrixMultChainOptimization extends HopRewriteRule
 			 *    (either within chain or outside the chain)
 			 */
 
-			if (    h instanceof AggBinaryOp && ((AggBinaryOp) h).isMatrixMultiply()
+			if (    HopRewriteUtils.isMatrixMultiply(h)
 			     && !((AggBinaryOp)hop).hasLeftPMInput() 
 				 && h.getVisited() != Hop.VisitStatus.DONE ) 
 			{

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveReadAfterWrite.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveReadAfterWrite.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveReadAfterWrite.java
index 61cc5e7..9348088 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveReadAfterWrite.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveReadAfterWrite.java
@@ -69,11 +69,8 @@ public class RewriteRemoveReadAfterWrite extends HopRewriteRule
 				//rewire read consumers to write input
 				Hop input = writes.get(rfname).getInput().get(0);
 				ArrayList<Hop> parents = (ArrayList<Hop>) rhop.getParent().clone();
-				for( Hop p : parents ) {
-					int pos = HopRewriteUtils.getChildReferencePos(p, rhop);
-					HopRewriteUtils.removeChildReferenceByPos(p, rhop, pos);
-					HopRewriteUtils.addChildReference(p, input, pos);
-				}
+				for( Hop p : parents )
+					HopRewriteUtils.replaceChildReference(p, rhop, input);
 			}
 		}
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveUnnecessaryCasts.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveUnnecessaryCasts.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveUnnecessaryCasts.java
index 0ff154c..95a1214 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveUnnecessaryCasts.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveUnnecessaryCasts.java
@@ -123,11 +123,8 @@ public class RewriteRemoveUnnecessaryCasts extends HopRewriteRule
 				Hop input = uop2.getInput().get(0);
 				//rewire parents
 				ArrayList<Hop> parents = (ArrayList<Hop>) hop.getParent().clone();
-				for( Hop p : parents ) {
-					int ix = HopRewriteUtils.getChildReferencePos(p, hop);
-					HopRewriteUtils.removeChildReference(p, hop);
-					HopRewriteUtils.addChildReference(p, input, ix);
-				}
+				for( Hop p : parents )
+					HopRewriteUtils.replaceChildReference(p, hop, input);
 			}
 		}
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagDataDependentOperators.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagDataDependentOperators.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagDataDependentOperators.java
index 69cabc5..c4e6caa 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagDataDependentOperators.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagDataDependentOperators.java
@@ -37,7 +37,6 @@ import org.apache.sysml.hops.LiteralOp;
 import org.apache.sysml.hops.ParameterizedBuiltinOp;
 import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.TernaryOp;
-import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.recompile.Recompiler;
 import org.apache.sysml.parser.DataIdentifier;
 import org.apache.sysml.parser.StatementBlock;
@@ -132,13 +131,9 @@ public class RewriteSplitDagDataDependentOperators extends StatementBlockRewrite
 						for( int i=0; i<parents.size(); i++ ) {
 							//prevent concurrent modification by index access
 							Hop parent = parents.get(i);
-							if( !candChilds.contains(parent) ) //anomaly filter
-							{
-								if( parent != twrite ) {
-									int pos = HopRewriteUtils.getChildReferencePos(parent, c);
-									HopRewriteUtils.removeChildReferenceByPos(parent, c, pos);
-									HopRewriteUtils.addChildReference(parent, tread, pos);
-								}
+							if( !candChilds.contains(parent) ) { //anomaly filter
+								if( parent != twrite )
+									HopRewriteUtils.replaceChildReference(parent, c, tread);
 								else
 									sb.get_hops().remove(parent);
 							}
@@ -163,11 +158,7 @@ public class RewriteSplitDagDataDependentOperators extends StatementBlockRewrite
 							//prevent concurrent modification by index access
 							Hop parent = parents.get(i);
 							if( !candChilds.contains(parent) ) //anomaly filter
-							{
-								int pos = HopRewriteUtils.getChildReferencePos(parent, c);
-								HopRewriteUtils.removeChildReferenceByPos(parent, c, pos);
-								HopRewriteUtils.addChildReference(parent, tread, pos);
-							}
+								HopRewriteUtils.replaceChildReference(parent, c, tread);
 						}
 						
 						//add data-dependent operator sub dag to first statement block
@@ -258,7 +249,7 @@ public class RewriteSplitDagDataDependentOperators extends StatementBlockRewrite
 			for( Hop p : hop.getParent() ) {
 				//list of operators without need for empty blocks to be extended as needed
 				noEmptyBlocks &= (   p instanceof AggBinaryOp && hop == p.getInput().get(0) 
-				                  || p instanceof UnaryOp && ((UnaryOp)p).getOp()==OpOp1.NROW);
+				                  || HopRewriteUtils.isUnary(p, OpOp1.NROW) );
 				onlyPMM &= (p instanceof AggBinaryOp && hop == p.getInput().get(0));
 			}
 			pbhop.setOutputEmptyBlocks(!noEmptyBlocks);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagUnknownCSVRead.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagUnknownCSVRead.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagUnknownCSVRead.java
index e0a6590..692762e 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagUnknownCSVRead.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteSplitDagUnknownCSVRead.java
@@ -87,9 +87,7 @@ public class RewriteSplitDagUnknownCSVRead extends StatementBlockRewriteRule
 					for( int i=0; i<parents.size(); i++ )
 					{
 						Hop parent = parents.get(i);
-						int pos = HopRewriteUtils.getChildReferencePos(parent, reblock);
-						HopRewriteUtils.removeChildReferenceByPos(parent, reblock, pos);
-						HopRewriteUtils.addChildReference(parent, tread, pos);
+						HopRewriteUtils.replaceChildReference(parent, reblock, tread);
 					}
 					
 					//add reblock sub dag to first statement block

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index e3533b7..0063997 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -2899,9 +2899,9 @@ public class DMLTranslator
 	}
 	
 	private void setBlockSizeAndRefreshSizeInfo(Hop in, Hop out) {
-		HopRewriteUtils.setOutputBlocksizes(out, in.getRowsInBlock(), in.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(in, out);
+		out.setOutputBlocksizes(in.getRowsInBlock(), in.getColsInBlock());
 		out.refreshSizeInformation();
+		HopRewriteUtils.copyLineNumbers(in, out);
 	}
 
 	private ArrayList<Hop> getALHopsForConvOpPoolingCOL2IM(Hop first, BuiltinFunctionExpression source, int skip, HashMap<String, Hop> hops) throws ParseException {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index 66065bf..bbe5bf7 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -2589,7 +2589,7 @@ public class OptimizerRuleBased extends Optimizer
 					ret = true;
 					sharedVars.add(ch.getName());
 				}
-				else if(    ch instanceof ReorgOp && ((ReorgOp)ch).getOp()==ReOrgOp.TRANSPOSE 
+				else if( HopRewriteUtils.isTransposeOperation(ch)  
 					&& ch.getInput().get(0) instanceof DataOp && ch.getInput().get(0).getDataType() == DataType.MATRIX
 					&& inputVars.contains(ch.getInput().get(0).getName()) )
 					//&& !partitionedVars.contains(ch.getInput().get(0).getName()))
@@ -2707,8 +2707,7 @@ public class OptimizerRuleBased extends Optimizer
 				for( Hop in : h.getInput() ) {
 					if( in instanceof DataOp )
 						cand.add( in.getName() );
-					else if( in instanceof ReorgOp 
-						&& ((ReorgOp)in).getOp()==ReOrgOp.TRANSPOSE
+					else if( HopRewriteUtils.isTransposeOperation(in)
 						&& in.getInput().get(0) instanceof DataOp )
 						cand.add( in.getInput().get(0).getName() );
 				}


[6/6] incubator-systemml git commit: [SYSTEMML-1336] Improved parfor optimizer (conditional partitioning)

Posted by mb...@apache.org.
[SYSTEMML-1336] Improved parfor optimizer (conditional partitioning)

This patch improves the parfor optimizer to consider what-if scenarios
with conditional partitioning to avoid falling back to local parfor
plans with small degree of parallelism (if the data barely fits in the
driver) although we could have applied a fused partition-execute parfor
job. 

For example, on perftest 8GB univariate-stats, it improved the
end-to-end runtime (incl spark context creation and I/O) from 781s to
110s.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2f7fa8d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2f7fa8d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2f7fa8d7

Branch: refs/heads/master
Commit: 2f7fa8d73fa9680df283444627209a31c5ef4acd
Parents: 35da413
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Feb 23 22:19:24 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 24 12:27:29 2017 -0800

----------------------------------------------------------------------
 .../parfor/opt/CostEstimator.java               | 26 +++++--
 .../controlprogram/parfor/opt/OptNode.java      |  2 +
 .../parfor/opt/OptimizerConstrained.java        | 37 ++++++----
 .../parfor/opt/OptimizerRuleBased.java          | 75 +++++++++++---------
 4 files changed, 90 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7fa8d7/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
index bb3ca88..3fdf8bd 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
@@ -55,6 +55,8 @@ public abstract class CostEstimator
 		SPARSE
 	}
 	
+	protected boolean _inclCondPart = false;
+	
 	/**
 	 * Main leaf node estimation method - to be overwritten by specific cost estimators
 	 * 
@@ -88,6 +90,7 @@ public abstract class CostEstimator
 	 * 
 	 * @param measure ?
 	 * @param node internal representation of a plan alternative for program blocks and instructions
+	 * @param inclCondPart including conditional partitioning
 	 * @return estimate?
 	 * @throws DMLRuntimeException if DMLRuntimeException occurs
 	 */
@@ -97,13 +100,26 @@ public abstract class CostEstimator
 		return getEstimate(measure, node, null);
 	}
 	
+	public double getEstimate( TestMeasure measure, OptNode node, boolean inclCondPart ) 
+		throws DMLRuntimeException
+	{
+		//temporarily change local flag and get estimate
+		boolean oldInclCondPart = _inclCondPart;
+		_inclCondPart = inclCondPart; 
+		double val = getEstimate(measure, node, null);
+		
+		//reset local flag and return
+		_inclCondPart = oldInclCondPart;
+		return val;
+	}
+	
 	/**
 	 * Main estimation method.
 	 * 
-	 * @param measure ?
-	 * @param node internal representation of a plan alternative for program blocks and instructions
+	 * @param measure estimate type (time or memory)
+	 * @param node plan opt tree node
 	 * @param et execution type
-	 * @return estimate?
+	 * @return estimate
 	 * @throws DMLRuntimeException if DMLRuntimeException occurs
 	 */
 	public double getEstimate( TestMeasure measure, OptNode node, ExecType et ) 
@@ -113,7 +129,9 @@ public abstract class CostEstimator
 		
 		if( node.isLeaf() )
 		{
-			if( et != null )
+			if( _inclCondPart && node.getParam(ParamType.DATA_PARTITION_COND_MEM) != null )
+				val = Double.parseDouble(node.getParam(ParamType.DATA_PARTITION_COND_MEM));
+			else if( et != null )
 				val = getLeafNodeEstimate(measure, node, et); //forced type
 			else 
 				val = getLeafNodeEstimate(measure, node); //default	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7fa8d7/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
index 7968c6a..26c30d4 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
@@ -71,6 +71,8 @@ public class OptNode
 		TASK_SIZE,
 		DATA_PARTITIONER,
 		DATA_PARTITION_FORMAT,
+		DATA_PARTITION_COND,
+		DATA_PARTITION_COND_MEM,
 		RESULT_MERGE,
 		NUM_ITERATIONS,
 		RECURSIVE_CALL

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7fa8d7/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
index 39e742f..6edcec3 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
@@ -80,7 +80,6 @@ public class OptimizerConstrained extends OptimizerRuleBased
 		LOG.debug("--- "+getOptMode()+" OPTIMIZER -------");
 
 		OptNode pn = plan.getRoot();
-		double M0 = -1, M1 = -1, M2 = -1; //memory consumption
 
 		//early abort for empty parfor body 
 		if( pn.isLeaf() )
@@ -100,35 +99,45 @@ public class OptimizerConstrained extends OptimizerRuleBased
 		ExecType oldET = pn.getExecType();
 		int oldK = pn.getK();
 		pn.setSerialParFor(); //for basic mem consumption 
-		M0 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
+		double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
 		pn.setExecType(oldET);
 		pn.setK(oldK);
-		LOG.debug(getOptMode()+" OPT: estimated mem (serial exec) M="+toMB(M0) );
+		LOG.debug(getOptMode()+" OPT: estimated mem (serial exec) M="+toMB(M0a) );
 
 		//OPTIMIZE PARFOR PLAN
 
 		// rewrite 1: data partitioning (incl. log. recompile RIX)
 		HashMap<String, PDataPartitionFormat> partitionedMatrices = new HashMap<String,PDataPartitionFormat>();
-		rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices );
-		M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
+		rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget() );
+		double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
 
 		// rewrite 2: remove unnecessary compare matrix
 		rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
 
 		// rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX) 
-		boolean flagLIX = super.rewriteSetResultPartitioning( pn, M1, ec.getVariables() );
-		M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 
-		M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
+		boolean flagLIX = super.rewriteSetResultPartitioning( pn, M0b, ec.getVariables() );
+		double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 
 		LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec) M="+toMB(M1) );
+		
+		//determine memory consumption for what-if: all-cp or partitioned
+		double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
 		LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec, all CP) M="+toMB(M2) );
-
+		double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
+		LOG.debug(getOptMode()+" OPT: estimated new mem (cond partitioning) M="+toMB(M3) );
+		
 		// rewrite 4: execution strategy
 		PExecMode tmpmode = getPExecMode(pn); //keep old
-		boolean flagRecompMR = rewriteSetExecutionStategy( pn, M0, M1, M2, flagLIX );
+		boolean flagRecompMR = rewriteSetExecutionStategy( pn, M0a, M1, M2, M3, flagLIX );
 
 		//exec-type-specific rewrites
 		if( pn.getExecType() == ExecType.MR || pn.getExecType() == ExecType.SPARK )
 		{
+			if( M1 > _rm && M3 <= _rm  ) {
+				// rewrite 1: data partitioning (apply conditional partitioning)
+				rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3 );
+				M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 		
+			}
+			
 			if( flagRecompMR ){
 				//rewrite 5: set operations exec type
 				rewriteSetOperationsExecType( pn, flagRecompMR );
@@ -221,7 +230,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 	///
 
 	@Override
-	protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String,PDataPartitionFormat> partitionedMatrices)
+	protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String,PDataPartitionFormat> partitionedMatrices, double thetaM)
 		throws DMLRuntimeException
 	{
 		boolean blockwise = false;
@@ -235,7 +244,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 			LOG.debug(getOptMode()+" OPT: forced 'set data partitioner' - result="+n.getParam(ParamType.DATA_PARTITIONER) );
 		}
 		else
-			super.rewriteSetDataPartitioner(n, vars, partitionedMatrices);
+			super.rewriteSetDataPartitioner(n, vars, partitionedMatrices, thetaM);
 
 		return blockwise;
 	}
@@ -246,7 +255,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 	///
 
 	@Override
-	protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, boolean flagLIX)
+	protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX)
 		throws DMLRuntimeException
 	{
 		boolean ret = false;
@@ -270,7 +279,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 			LOG.debug(getOptMode()+" OPT: forced 'set execution strategy' - result="+mode );
 		}
 		else
-			ret = super.rewriteSetExecutionStategy(n, M0, M, M2, flagLIX);
+			ret = super.rewriteSetExecutionStategy(n, M0, M, M2, M3, flagLIX);
 
 		return ret;
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7fa8d7/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index bbe5bf7..87cabaa 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -216,7 +216,6 @@ public class OptimizerRuleBased extends Optimizer
 		LOG.debug("--- "+getOptMode()+" OPTIMIZER -------");
 
 		OptNode pn = plan.getRoot();
-		double M0 = -1, M1 = -1, M2 = -1; //memory consumption
 		
 		//early abort for empty parfor body 
 		if( pn.isLeaf() )
@@ -234,32 +233,42 @@ public class OptimizerRuleBased extends Optimizer
 		
 		//ESTIMATE memory consumption 
 		pn.setSerialParFor(); //for basic mem consumption 
-		M0 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
-		LOG.debug(getOptMode()+" OPT: estimated mem (serial exec) M="+toMB(M0) );
+		double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
+		LOG.debug(getOptMode()+" OPT: estimated mem (serial exec) M="+toMB(M0a) );
 		
 		//OPTIMIZE PARFOR PLAN
 		
-		// rewrite 1: data partitioning (incl. log. recompile RIX)
+		// rewrite 1: data partitioning (incl. log. recompile RIX and flag opt nodes)
 		HashMap<String, PDataPartitionFormat> partitionedMatrices = new HashMap<String,PDataPartitionFormat>();
-		rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices );
-		M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
+		rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget() );
+		double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
 		
 		// rewrite 2: remove unnecessary compare matrix (before result partitioning)
 		rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
 		
 		// rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX) 
-		boolean flagLIX = rewriteSetResultPartitioning( pn, M1, ec.getVariables() );
-		M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 
-		M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
+		boolean flagLIX = rewriteSetResultPartitioning( pn, M0b, ec.getVariables() );
+		double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 
 		LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec) M="+toMB(M1) );
+		
+		//determine memory consumption for what-if: all-cp or partitioned 
+		double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
 		LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec, all CP) M="+toMB(M2) );
+		double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
+		LOG.debug(getOptMode()+" OPT: estimated new mem (cond partitioning) M="+toMB(M3) );
 		
 		// rewrite 4: execution strategy
-		boolean flagRecompMR = rewriteSetExecutionStategy( pn, M0, M1, M2, flagLIX );
+		boolean flagRecompMR = rewriteSetExecutionStategy( pn, M0a, M1, M2, M3, flagLIX );
 		
 		//exec-type-specific rewrites
 		if( pn.getExecType() == ExecType.MR || pn.getExecType()==ExecType.SPARK )
 		{
+			if( M1 > _rm && M3 <= _rm  ) {
+				// rewrite 1: data partitioning (apply conditional partitioning)
+				rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3 );
+				M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 		
+			}
+			
 			if( flagRecompMR ){
 				//rewrite 5: set operations exec type
 				rewriteSetOperationsExecType( pn, flagRecompMR );
@@ -390,7 +399,7 @@ public class OptimizerRuleBased extends Optimizer
 	//REWRITE set data partitioner
 	///
 
-	protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PDataPartitionFormat> partitionedMatrices ) 
+	protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PDataPartitionFormat> partitionedMatrices, double thetaM ) 
 		throws DMLRuntimeException
 	{
 		if( n.getNodeType() != NodeType.PARFOR )
@@ -414,16 +423,15 @@ public class OptimizerRuleBased extends Optimizer
 			for( String c : cand )
 			{
 				PDataPartitionFormat dpf = pfsb.determineDataPartitionFormat( c );
-				//System.out.println("Partitioning Format: "+dpf);
+				
 				if( dpf != PDataPartitionFormat.NONE 
-					&& dpf != PDataPartitionFormat.BLOCK_WISE_M_N ) //FIXME
+					&& dpf != PDataPartitionFormat.BLOCK_WISE_M_N ) 
 				{
 					cand2.put( c, dpf );
-				}
-					
+				}	
 			}
 			
-			apply = rFindDataPartitioningCandidates(n, cand2, vars);
+			apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
 			if( apply )
 				partitionedMatrices.putAll(cand2);
 		}
@@ -447,7 +455,7 @@ public class OptimizerRuleBased extends Optimizer
 		return blockwise;
 	}
 
-	protected boolean rFindDataPartitioningCandidates( OptNode n, HashMap<String, PDataPartitionFormat> cand, LocalVariableMap vars ) 
+	protected boolean rFindDataPartitioningCandidates( OptNode n, HashMap<String, PDataPartitionFormat> cand, LocalVariableMap vars, double thetaM ) 
 		throws DMLRuntimeException
 	{
 		boolean ret = false;
@@ -456,7 +464,7 @@ public class OptimizerRuleBased extends Optimizer
 		{
 			for( OptNode cn : n.getChilds() )
 				if( cn.getNodeType() != NodeType.FUNCCALL ) //prevent conflicts with aliases
-					ret |= rFindDataPartitioningCandidates( cn, cand, vars );
+					ret |= rFindDataPartitioningCandidates( cn, cand, vars, thetaM );
 		}
 		else if( n.getNodeType()== NodeType.HOP
 			     && n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING) )
@@ -470,20 +478,22 @@ public class OptimizerRuleBased extends Optimizer
 				//NOTE: for the moment, we do not partition according to the remote mem, because we can execute 
 				//it even without partitioning in CP. However, advanced optimizers should reason about this 					   
 				//double mold = h.getMemEstimate();
-				if(	   n.getExecType() == ExecType.MR ||  n.getExecType()==ExecType.SPARK ) //Opt Condition: MR/Spark
-				   // || (mold > _rm && mnew <= _rm)   ) //Opt Condition: non-MR special cases (for remote exec)
+				if(	   n.getExecType() == ExecType.MR ||  n.getExecType()==ExecType.SPARK  //Opt Condition: MR/Spark
+					|| h.getMemEstimate() > thetaM ) //Opt Condition: mem estimate > constraint to force partitioning	
 				{
 					//NOTE: subsequent rewrites will still use the MR mem estimate
 					//(guarded by subsequent operations that have at least the memory req of one partition)
-					//if( mnew < _lm ) //apply rewrite if partitions fit into memory
-					//	n.setExecType(ExecType.CP);
-					//else
-					//	n.setExecType(ExecType.CP); //CP_FILE, but hop still in MR 
-					n.setExecType(ExecType.CP);
+					n.setExecType(ExecType.CP); //partition ref only (see below)
 					n.addParam(ParamType.DATA_PARTITION_FORMAT, dpf.toString());
 					h.setMemEstimate( mnew ); //CP vs CP_FILE in ProgramRecompiler bases on mem_estimate
 					ret = true;
 				}
+				//keep track of nodes that allow conditional data partitioning and their mem
+				else
+				{
+					n.addParam(ParamType.DATA_PARTITION_COND, String.valueOf(true));
+					n.addParam(ParamType.DATA_PARTITION_COND_MEM, String.valueOf(mnew));
+				}
 			}
 		}
 		
@@ -803,7 +813,7 @@ public class OptimizerRuleBased extends Optimizer
 	//REWRITE set execution strategy
 	///
 
-	protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, boolean flagLIX) 
+	protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) 
 		throws DMLRuntimeException
 	{
 		boolean isCPOnly = n.isCPOnly();
@@ -814,26 +824,27 @@ public class OptimizerRuleBased extends Optimizer
 		PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
 
 		//deciding on the execution strategy
-		if( ConfigurationManager.isParallelParFor()            //allowed remote parfor execution
-			&& ( (isCPOnly && M <= _rm )    //Required: all instruction can be be executed in CP
-			   ||(isCPOnlyPossible && M2 <= _rm)) )  //Required: cp inst fit into remote JVM mem 
+		if( ConfigurationManager.isParallelParFor()  //allowed remote parfor execution
+			&& ( (isCPOnly && M <= _rm )             //Required: all inst already in cp and fit in remote mem
+			   ||(isCPOnly && M3 <= _rm ) 	         //Required: all inst already in cp and fit partitioned in remote mem
+			   ||(isCPOnlyPossible && M2 <= _rm)) )  //Required: all inst forced to cp fit in remote mem
 		{
 			//at this point all required conditions for REMOTE_MR given, now its an opt decision
 			int cpk = (int) Math.min( _lk, Math.floor( _lm / M ) ); //estimated local exploited par  
 			
 			//MR if local par cannot be exploited due to mem constraints (this implies that we work on large data)
 			//(the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
-			if( 2*cpk < _lk && 2*cpk < _N && 2*cpk < _rk )
+			if( 2*cpk < _lk && 2*cpk < _N && 2*cpk < _rk ) //incl conditional partitioning
 			{
 				n.setExecType( REMOTE ); //remote parfor
 			}
 			//MR if problem is large enough and remote parallelism is larger than local   
-			else if( _lk < _N && _lk < _rk && isLargeProblem(n, M0) )
+			else if( _lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0) )
 			{
 				n.setExecType( REMOTE ); //remote parfor
 			}
 			//MR if MR operations in local, but CP only in remote (less overall MR jobs)
-			else if( (!isCPOnly) && isCPOnlyPossible )
+			else if( !isCPOnly && isCPOnlyPossible )
 			{
 				n.setExecType( REMOTE ); //remote parfor
 			}


[2/6] incubator-systemml git commit: [SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

Posted by mb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
index 6ffcbd5..cc67cc1 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
@@ -24,7 +24,6 @@ import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
@@ -204,12 +203,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			    HopRewriteUtils.isDimsKnown(hi)) //output dims known
 			{
 				//remove unnecessary right indexing
-				HopRewriteUtils.removeChildReference(parent, hi);
-				
 				Hop hnew = HopRewriteUtils.createDataGenOpByVal( new LiteralOp(hi.getDim1()), 
 						                                         new LiteralOp(hi.getDim2()), 0);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 				hi = hnew;
 				
 				LOG.debug("Applied removeEmptyRightIndexing");
@@ -232,9 +228,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				//handling if out of range indexing)
 				
 				//remove unnecessary right indexing
-				HopRewriteUtils.removeChildReference(parent, hi);
-				HopRewriteUtils.addChildReference(parent, input, pos);
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
 				hi = input;
 				
 				LOG.debug("Applied removeUnnecessaryRightIndexing");
@@ -255,11 +249,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if(   input1.getNnz()==0 //nnz original known and empty
 			   && input2.getNnz()==0  ) //nnz input known and empty
 			{
-				//remove unnecessary right indexing
-				HopRewriteUtils.removeChildReference(parent, hi);		
+				//remove unnecessary right indexing		
 				Hop hnew = HopRewriteUtils.createDataGenOp( input1, 0);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 				hi = hnew;
 				
 				LOG.debug("Applied removeEmptyLeftIndexing");
@@ -279,10 +271,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			{
 				//equal dims of left indexing input and output -> no need for indexing
 				
-				//remove unnecessary right indexing
-				HopRewriteUtils.removeChildReference(parent, hi);				
-				HopRewriteUtils.addChildReference(parent, input, pos);
-				parent.refreshSizeInformation();
+				//remove unnecessary right indexing				
+				HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
 				hi = input;
 				
 				LOG.debug("Applied removeUnnecessaryLeftIndexing");
@@ -314,9 +304,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				&& input1.getDataType()!=DataType.SCALAR && input2.getDataType()!=DataType.SCALAR )
 			{
 				//create new cbind operation and rewrite inputs
-				HopRewriteUtils.removeChildReference(parent, hi);		
 				BinaryOp bop = HopRewriteUtils.createBinary(input1, input2, OpOp2.CBIND);
-				HopRewriteUtils.addChildReference(parent, bop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, bop, pos);
 				
 				hi = bop;
 				applied = true;
@@ -341,9 +330,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				&& input1.getDataType()!=DataType.SCALAR && input2.getDataType()!=DataType.SCALAR )
 			{
 				//create new cbind operation and rewrite inputs
-				HopRewriteUtils.removeChildReference(parent, hi);		
 				BinaryOp bop = HopRewriteUtils.createBinary(input1, input2, OpOp2.RBIND);
-				HopRewriteUtils.addChildReference(parent, bop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, bop, pos);
 				
 				hi = bop;
 				applied = true;
@@ -366,10 +354,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			{
 				OpOp1 op = ((UnaryOp)hi).getOp();
 				
-				//remove unnecessary unary cumsum operator
-				HopRewriteUtils.removeChildReference(parent, hi);				
-				HopRewriteUtils.addChildReference(parent, input, pos);
-				parent.refreshSizeInformation();
+				//remove unnecessary unary cumsum operator				
+				HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
 				hi = input;
 				
 				LOG.debug("Applied removeUnnecessaryCumulativeOp: "+op);
@@ -396,9 +382,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					&& rop.getDim1()==1 && rop.getDim2()==1);
 			
 			if( apply ) {
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);				
-				HopRewriteUtils.addChildReference(parent, input, pos);
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
 				hi = input;
 				LOG.debug("Applied removeUnnecessaryReorg.");
 			}			
@@ -414,7 +398,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			Hop right = hi.getInput().get(1);
 			
 			//check for column replication
-			if(    right instanceof AggBinaryOp //matrix mult with datagen
+			if(    HopRewriteUtils.isMatrixMultiply(right) //matrix mult with datagen
 				&& right.getInput().get(1) instanceof DataGenOp 
 				&& ((DataGenOp)right.getInput().get(1)).getOp()==DataGenMethod.RAND
 				&& ((DataGenOp)right.getInput().get(1)).hasConstantValue(1d)
@@ -422,31 +406,21 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				&& right.getInput().get(0).getDim2() == 1 ) //column vector for mv binary
 			{
 				//remove unnecessary outer product
-				HopRewriteUtils.removeChildReference(hi, right);				
-				HopRewriteUtils.addChildReference(hi, right.getInput().get(0) );
-				hi.refreshSizeInformation();
-				
-				//cleanup refs to matrix mult if no remaining consumers
-				if( right.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( right );
+				HopRewriteUtils.replaceChildReference(hi, right, right.getInput().get(0), 1 );
+				HopRewriteUtils.cleanupUnreferenced(right);
 				
 				LOG.debug("Applied removeUnnecessaryOuterProduct1 (line "+right.getBeginLine()+")");
 			}
 			//check for row replication
-			else if(    right instanceof AggBinaryOp //matrix mult with datagen
+			else if( HopRewriteUtils.isMatrixMultiply(right) //matrix mult with datagen
 				&& right.getInput().get(0) instanceof DataGenOp 
 				&& ((DataGenOp)right.getInput().get(0)).hasConstantValue(1d)
 				&& right.getInput().get(0).getDim2() == 1 //colunm vector for replication
 				&& right.getInput().get(1).getDim1() == 1 ) //row vector for mv binary
 			{
 				//remove unnecessary outer product
-				HopRewriteUtils.removeChildReference(hi, right);				
-				HopRewriteUtils.addChildReference(hi, right.getInput().get(1) );
-				hi.refreshSizeInformation();
-				
-				//cleanup refs to matrix mult if no remaining consumers
-				if( right.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( right );
+				HopRewriteUtils.replaceChildReference(hi, right, right.getInput().get(1), 1 );
+				HopRewriteUtils.cleanupUnreferenced(right);
 				
 				LOG.debug("Applied removeUnnecessaryOuterProduct2 (line "+right.getBeginLine()+")");
 			}
@@ -458,7 +432,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	@SuppressWarnings("unchecked")
 	private Hop fuseDatagenAndReorgOperation(Hop parent, Hop hi, int pos)
 	{
-		if( hi instanceof ReorgOp && ((ReorgOp)hi).getOp()==ReOrgOp.TRANSPOSE  //transpose
+		if( HopRewriteUtils.isTransposeOperation(hi)
 			&& hi.getInput().get(0) instanceof DataGenOp                       //datagen
 			&& hi.getInput().get(0).getParent().size()==1 )                    //transpose only consumer
 		{
@@ -512,17 +486,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 							// the column variances will each be zero.
 							// Therefore, perform a rewrite from COLVAR(X) to a row vector of zeros.
 							Hop emptyRow = HopRewriteUtils.createDataGenOp(uhi, input, 0);
-							HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-							HopRewriteUtils.addChildReference(parent, emptyRow, pos);
-							parent.refreshSizeInformation();
-
-							// cleanup
-							if (hi.getParent().isEmpty())
-								HopRewriteUtils.removeAllChildReferences(hi);
-							if (input.getParent().isEmpty())
-								HopRewriteUtils.removeAllChildReferences(input);
-
-							// replace current HOP with new empty row HOP
+							HopRewriteUtils.replaceChildReference(parent, hi, emptyRow, pos);
+							HopRewriteUtils.cleanupUnreferenced(hi, input);
 							hi = emptyRow;
 
 							LOG.debug("Applied simplifyColwiseAggregate for colVars");
@@ -530,13 +495,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 							// All other valid column aggregations over a row vector will result
 							// in the row vector itself.
 							// Therefore, remove unnecessary col aggregation for 1 row.
-							HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-							if( hi.getParent().isEmpty() ) //no remaining consumers
-								HopRewriteUtils.removeChildReference(hi, input);
-							HopRewriteUtils.addChildReference(parent, input, pos);
-							parent.refreshSizeInformation();
-
-							// replace current HOP with input HOP
+							HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
+							HopRewriteUtils.cleanupUnreferenced(hi);
 							hi = input;
 
 							LOG.debug("Applied simplifyColwiseAggregate1");
@@ -552,15 +512,12 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 						uhi.setDataType(DataType.SCALAR);
 						
 						//create cast to keep same output datatype
-						UnaryOp cast = new UnaryOp(uhi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
-				                   OpOp1.CAST_AS_MATRIX, uhi);
-						HopRewriteUtils.setOutputParameters(cast, 1, 1, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), -1);
+						UnaryOp cast = HopRewriteUtils.createUnary(uhi, OpOp1.CAST_AS_MATRIX);
 						
 						//rehang cast under all parents
 						for( Hop p : parents ) {
 							int ix = HopRewriteUtils.getChildReferencePos(p, hi);
-							HopRewriteUtils.removeChildReference(p, hi);
-							HopRewriteUtils.addChildReference(p, cast, ix);
+							HopRewriteUtils.replaceChildReference(p, hi, cast, ix);
 						}
 						
 						hi = cast;
@@ -594,15 +551,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 							// Therefore, perform a rewrite from ROWVAR(X) to a column vector of
 							// zeros.
 							Hop emptyCol = HopRewriteUtils.createDataGenOp(input, uhi, 0);
-							HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-							HopRewriteUtils.addChildReference(parent, emptyCol, pos);
-							parent.refreshSizeInformation();
-
-							// cleanup
-							if (hi.getParent().isEmpty())
-								HopRewriteUtils.removeAllChildReferences(hi);
-							if (input.getParent().isEmpty())
-								HopRewriteUtils.removeAllChildReferences(input);
+							HopRewriteUtils.replaceChildReference(parent, hi, emptyCol, pos);
+							HopRewriteUtils.cleanupUnreferenced(hi, input);
 
 							// replace current HOP with new empty column HOP
 							hi = emptyCol;
@@ -612,13 +562,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 							// All other valid row aggregations over a column vector will result
 							// in the column vector itself.
 							// Therefore, remove unnecessary row aggregation for 1 col
-							HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-							if( hi.getParent().isEmpty() ) //no remaining consumers
-								HopRewriteUtils.removeChildReference(hi, input);
-							HopRewriteUtils.addChildReference(parent, input, pos);
-							parent.refreshSizeInformation();
-
-							// replace current HOP with input HOP
+							HopRewriteUtils.replaceChildReference(parent, hi, input, pos);
+							HopRewriteUtils.cleanupUnreferenced(hi);
 							hi = input;
 
 							LOG.debug("Applied simplifyRowwiseAggregate1");
@@ -634,15 +579,12 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 						uhi.setDataType(DataType.SCALAR);
 						
 						//create cast to keep same output datatype
-						UnaryOp cast = new UnaryOp(uhi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
-				                   OpOp1.CAST_AS_MATRIX, uhi);
-						HopRewriteUtils.setOutputParameters(cast, 1, 1, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), -1);
+						UnaryOp cast = HopRewriteUtils.createUnary(uhi, OpOp1.CAST_AS_MATRIX);
 						
 						//rehang cast under all parents
 						for( Hop p : parents ) {
 							int ix = HopRewriteUtils.getChildReferencePos(p, hi);
-							HopRewriteUtils.removeChildReference(p, hi);
-							HopRewriteUtils.addChildReference(p, cast, ix);
+							HopRewriteUtils.replaceChildReference(p, hi, cast, ix);
 						}
 						
 						hi = cast;
@@ -666,36 +608,26 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			AggUnaryOp uhi = (AggUnaryOp)hi;
 			Hop input = uhi.getInput().get(0);
 			
-			if( uhi.getOp() == AggOp.SUM && uhi.getDirection() == Direction.Col  ) //colsums
+			if( uhi.getOp() == AggOp.SUM && uhi.getDirection() == Direction.Col //colsums
+			    && HopRewriteUtils.isBinary(input, OpOp2.MULT) ) //b(*) 
 			{
-				if( input instanceof BinaryOp && ((BinaryOp)input).getOp()==OpOp2.MULT ) //b(*) 
+				Hop left = input.getInput().get(0);
+				Hop right = input.getInput().get(1);
+				
+				if(    left.getDim1()>1 && left.getDim2()>1 
+					&& right.getDim1()>1 && right.getDim2()==1 ) // MV (col vector)
 				{
-					Hop left = input.getInput().get(0);
-					Hop right = input.getInput().get(1);
+					//create new operators 
+					ReorgOp trans = HopRewriteUtils.createTranspose(right);
+					AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(trans, left);
 					
-					if(    left.getDim1()>1 && left.getDim2()>1 
-						&& right.getDim1()>1 && right.getDim2()==1 ) // MV (col vector)
-					{
-						//remove link parent to rowsums
-						HopRewriteUtils.removeChildReference(parent, hi);
-						
-						//create new operators 
-						ReorgOp trans = HopRewriteUtils.createTranspose(right);
-						AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(trans, left);
-						
-						//relink new child
-						HopRewriteUtils.addChildReference(parent, mmult, pos);
-						hi = mmult;
-						
-						//cleanup old dag
-						if( uhi.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(uhi);
-						if( input.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(input);
-						
-						LOG.debug("Applied simplifyColSumsMVMult");
-					}
-				}	
+					//relink new child
+					HopRewriteUtils.replaceChildReference(parent, hi, mmult, pos);
+					HopRewriteUtils.cleanupUnreferenced(uhi, input);
+					hi = mmult;
+					
+					LOG.debug("Applied simplifyColSumsMVMult");
+				}
 			}
 		}
 		
@@ -712,37 +644,27 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			AggUnaryOp uhi = (AggUnaryOp)hi;
 			Hop input = uhi.getInput().get(0);
 			
-			if( uhi.getOp() == AggOp.SUM && uhi.getDirection() == Direction.Row  ) //rowsums
+			if( uhi.getOp() == AggOp.SUM && uhi.getDirection() == Direction.Row //rowsums
+				&& HopRewriteUtils.isBinary(input, OpOp2.MULT) ) //b(*) 
 			{
-				if( input instanceof BinaryOp && ((BinaryOp)input).getOp()==OpOp2.MULT ) //b(*) 
+				Hop left = input.getInput().get(0);
+				Hop right = input.getInput().get(1);
+				
+				if(    left.getDim1()>1 && left.getDim2()>1      
+					&& right.getDim1()==1 && right.getDim2()>1 ) // MV (row vector)
 				{
-					Hop left = input.getInput().get(0);
-					Hop right = input.getInput().get(1);
+					//create new operators 
+					ReorgOp trans = HopRewriteUtils.createTranspose(right);
+					AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(left, trans);
 					
-					if(    left.getDim1()>1 && left.getDim2()>1      
-						&& right.getDim1()==1 && right.getDim2()>1 ) // MV (row vector)
-					{
-						//remove link parent to rowsums
-						HopRewriteUtils.removeChildReference(parent, hi);
-						
-						//create new operators 
-						ReorgOp trans = HopRewriteUtils.createTranspose(right);
-						AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(left, trans);
-						
-						//relink new child
-						HopRewriteUtils.addChildReference(parent, mmult, pos);
-						hi = mmult;
-						
-						//cleanup old dag
-						if( uhi.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(uhi);
-						if( input.getParent().isEmpty() )
-							HopRewriteUtils.removeAllChildReferences(input);
-						
-						LOG.debug("Applied simplifyRowSumsMVMult");
-					}
-				}	
-			}
+					//relink new child
+					HopRewriteUtils.replaceChildReference(parent, hi, mmult, pos);
+					HopRewriteUtils.cleanupUnreferenced(hi, input);
+					hi = mmult;
+					
+					LOG.debug("Applied simplifyRowSumsMVMult");
+				}
+			}	
 		}
 		
 		return hi;
@@ -764,9 +686,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					UnaryOp cast = HopRewriteUtils.createUnary(input, OpOp1.CAST_AS_SCALAR);
 					
 					//remove unnecessary aggregation 
-					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-					HopRewriteUtils.addChildReference(parent, cast, pos);
-					parent.refreshSizeInformation();
+					HopRewriteUtils.replaceChildReference(parent, hi, cast, pos);
 					hi = cast;
 					
 					LOG.debug("Applied simplifyUnncessaryAggregate");
@@ -789,9 +709,6 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				if( HopRewriteUtils.isEmpty(input) )
 				{
-					//remove unnecessary aggregation 
-					HopRewriteUtils.removeChildReference(parent, hi);
-				
 					Hop hnew = null;
 					if( uhi.getDirection() == Direction.RowCol ) 
 						hnew = new LiteralOp(0.0);
@@ -801,8 +718,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 						hnew = HopRewriteUtils.createDataGenOp(input, uhi, 0); //ncol(uhi)=1
 					
 					//add new child to parent input
-					HopRewriteUtils.addChildReference(parent, hnew, pos);
-					parent.refreshSizeInformation();
+					HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 					hi = hnew;
 					
 					LOG.debug("Applied simplifyEmptyAggregate");
@@ -825,14 +741,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				if( HopRewriteUtils.isEmpty(input) )
 				{
-					//remove unnecessary aggregation 
-					HopRewriteUtils.removeChildReference(parent, hi);
-					
 					//create literal add it to parent
 					Hop hnew = HopRewriteUtils.createDataGenOp(input, 0);
-					HopRewriteUtils.addChildReference(parent, hnew, pos);
-					parent.refreshSizeInformation();
-					
+					HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 					hi = hnew;
 					
 					LOG.debug("Applied simplifyEmptyUnaryOperation");
@@ -873,9 +784,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 				//modify dag if one of the above rules applied
 				if( hnew != null ){ 
-					HopRewriteUtils.removeChildReference(parent, hi);
-					HopRewriteUtils.addChildReference(parent, hnew, pos);
-					parent.refreshSizeInformation();
+					HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 					hi = hnew;
 					
 					LOG.debug("Applied simplifyEmptyReorgOperation");
@@ -914,9 +823,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 								
 				//modify dag if one of the above rules applied
 				if( hnew != null ){ 
-					HopRewriteUtils.removeChildReference(parent, hi);
-					HopRewriteUtils.addChildReference(parent, hnew, pos);
-					parent.refreshSizeInformation();
+					HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 					hi = hnew;
 					
 					LOG.debug("Applied simplifyEmptySortOperation (indexreturn="+ixret+").");
@@ -931,7 +838,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	private Hop simplifyEmptyMatrixMult(Hop parent, Hop hi, int pos) 
 		throws HopsException
 	{
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply() ) //X%*%Y -> matrix(0, )
+		if( HopRewriteUtils.isMatrixMultiply(hi) ) //X%*%Y -> matrix(0, )
 		{
 			Hop left = hi.getInput().get(0);
 			Hop right = hi.getInput().get(1);
@@ -939,15 +846,10 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if(    HopRewriteUtils.isEmpty(left)  //one input empty
 				|| HopRewriteUtils.isEmpty(right) )
 			{
-				//remove unnecessary matrix mult 
-				HopRewriteUtils.removeChildReference(parent, hi);
-				
 				//create datagen and add it to parent
 				Hop hnew = HopRewriteUtils.createDataGenOp(left, right, 0);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
-				parent.refreshSizeInformation();
-				
-				hi = hnew;	
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
+				hi = hnew;
 				
 				LOG.debug("Applied simplifyEmptyMatrixMult");
 			}
@@ -959,7 +861,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	private Hop simplifyIdentityRepMatrixMult(Hop parent, Hop hi, int pos) 
 		throws HopsException
 	{
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply() ) //X%*%Y -> X, if y is matrix(1,1,1)
+		if( HopRewriteUtils.isMatrixMultiply(hi) ) //X%*%Y -> X, if y is matrix(1,1,1)
 		{
 			Hop left = hi.getInput().get(0);
 			Hop right = hi.getInput().get(1);
@@ -969,8 +871,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				right instanceof DataGenOp && ((DataGenOp)right).getOp()==DataGenMethod.RAND
 				&& ((DataGenOp)right).hasConstantValue(1.0)) //matrix(1,)
 			{
-				HopRewriteUtils.removeChildReference(parent, hi);			
-				HopRewriteUtils.addChildReference(parent, left, pos);			
+				HopRewriteUtils.replaceChildReference(parent, hi, left, pos);			
 				hi = left;
 				
 				LOG.debug("Applied simplifyIdentiyMatrixMult");
@@ -983,7 +884,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	private Hop simplifyScalarMatrixMult(Hop parent, Hop hi, int pos) 
 		throws HopsException
 	{
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply() ) //X%*%Y
+		if( HopRewriteUtils.isMatrixMultiply(hi) ) //X%*%Y
 		{
 			Hop left = hi.getInput().get(0);
 			Hop right = hi.getInput().get(1);
@@ -991,49 +892,27 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			// y %*% X -> as.scalar(y) * X
 			if( HopRewriteUtils.isDimsKnown(left) && left.getDim1()==1 && left.getDim2()==1 ) //scalar left
 			{
-				//remove link from parent to matrix mult
-				HopRewriteUtils.removeChildReference(parent, hi);
-			
-				UnaryOp cast = new UnaryOp(left.getName(), DataType.SCALAR, ValueType.DOUBLE, 
-						                   OpOp1.CAST_AS_SCALAR, left);
-				HopRewriteUtils.setOutputParameters(cast, 0, 0, 0, 0, 0);
-				BinaryOp mult = new BinaryOp(cast.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cast, right);
-				HopRewriteUtils.setOutputParameters(mult, right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), -1);
-				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
+				UnaryOp cast = HopRewriteUtils.createUnary(left, OpOp1.CAST_AS_SCALAR);
+				BinaryOp mult = HopRewriteUtils.createBinary(cast, right, OpOp2.MULT);
 				
 				//add mult to parent
-				HopRewriteUtils.addChildReference(parent, mult, pos);			
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, mult, pos);			
+				HopRewriteUtils.cleanupUnreferenced(hi);
 				
 				hi = mult;
-				
 				LOG.debug("Applied simplifyScalarMatrixMult1");
 			}
 			// X %*% y -> X * as.scalar(y)
 			else if( HopRewriteUtils.isDimsKnown(right) && right.getDim1()==1 && right.getDim2()==1 ) //scalar right
 			{
-				//remove link from parent to matrix mult
-				HopRewriteUtils.removeChildReference(parent, hi);
-			
-				UnaryOp cast = new UnaryOp(right.getName(), DataType.SCALAR, ValueType.DOUBLE, 
-						                   OpOp1.CAST_AS_SCALAR, right);
-				HopRewriteUtils.setOutputParameters(cast, 0, 0, 0, 0, 0);
-				BinaryOp mult = new BinaryOp(cast.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cast, left);
-				HopRewriteUtils.setOutputParameters(mult, left.getDim1(), left.getDim2(), left.getRowsInBlock(), left.getColsInBlock(), -1);
-				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
+				UnaryOp cast = HopRewriteUtils.createUnary(right, OpOp1.CAST_AS_SCALAR);
+				BinaryOp mult = HopRewriteUtils.createBinary(cast, left, OpOp2.MULT); 
 				
 				//add mult to parent
-				HopRewriteUtils.addChildReference(parent, mult, pos);			
-				parent.refreshSizeInformation();
+				HopRewriteUtils.replaceChildReference(parent, hi, mult, pos);			
+				HopRewriteUtils.cleanupUnreferenced(hi);
 				
 				hi = mult;
-				
 				LOG.debug("Applied simplifyScalarMatrixMult2");
 			}
 		}
@@ -1046,7 +925,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	{
 		Hop hnew = null;
 		
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply() ) //X%*%Y
+		if( HopRewriteUtils.isMatrixMultiply(hi) ) //X%*%Y
 		{
 			
 			Hop left = hi.getInput().get(0);
@@ -1061,36 +940,22 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				if( right.getDim2()==1 ) //right column vector
 				{
-					//remove link from parent to matrix mult
-					HopRewriteUtils.removeChildReference(parent, hi);
-					
 					//create binary operation over input and right
 					Hop input = left.getInput().get(0); //diag input
-					hnew = new BinaryOp(input.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, input, right);
-					HopRewriteUtils.setOutputParameters(hnew, left.getDim1(), right.getDim2(), left.getRowsInBlock(), left.getColsInBlock(), -1);
-				
+					hnew = HopRewriteUtils.createBinary(input, right, OpOp2.MULT);
+					
 					LOG.debug("Applied simplifyMatrixMultDiag1");
 				}
 				else if( right.getDim2()>1 ) //multi column vector 
 				{
-					//remove link from parent to matrix mult
-					HopRewriteUtils.removeChildReference(parent, hi);
-					
 					//create binary operation over input and right; in contrast to above rewrite,
 					//we need to switch the order because MV binary cell operations require vector on the right
 					Hop input = left.getInput().get(0); //diag input
-					hnew = new BinaryOp(input.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, right, input);
-					HopRewriteUtils.setOutputParameters(hnew, left.getDim1(), right.getDim2(), left.getRowsInBlock(), left.getColsInBlock(), -1);
+					hnew = HopRewriteUtils.createBinary(right, input, OpOp2.MULT);
+					
+					//NOTE: previously to MV binary cell operations we replicated the left 
+					//(if moderate number of columns: 2), but this is no longer required
 					
-					//NOTE: previously to MV binary cell operations we replicated the left (if moderate number of columns: 2)
-					//create binary operation over input and right
-					//Hop input = left.getInput().get(0);
-					//Hop ones = HopRewriteUtils.createDataGenOpByVal(new LiteralOp("1",1), new LiteralOp(String.valueOf(right.getDim2()),right.getDim2()), 1);
-					//Hop repmat = new AggBinaryOp( input.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, AggOp.SUM, input, ones );
-					//HopRewriteUtils.setOutputParameters(repmat, input.getDim1(), ones.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), -1);
-					//hnew = new BinaryOp(input.getName(), DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, repmat, right);
-					//HopRewriteUtils.setOutputParameters(hnew, right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), -1);
-				
 					LOG.debug("Applied simplifyMatrixMultDiag2");
 				}
 			}
@@ -1100,13 +965,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//if one of the above rewrites applied
 		if( hnew !=null ){
-			//cleanup if only consumer of intermediate
-			if( hi.getParent().isEmpty() ) 
-				HopRewriteUtils.removeAllChildReferences( hi );
-			
 			//add mult to parent
-			HopRewriteUtils.addChildReference(parent, hnew, pos);			
-			parent.refreshSizeInformation();
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);			
+			HopRewriteUtils.cleanupUnreferenced(hi);
 			
 			hi = hnew;	
 		}
@@ -1119,41 +980,21 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		if( hi instanceof ReorgOp && ((ReorgOp)hi).getOp()==ReOrgOp.DIAG && hi.getDim2()==1 ) //diagM2V
 		{
 			Hop hi2 = hi.getInput().get(0);
-			if( hi2 instanceof AggBinaryOp && ((AggBinaryOp)hi2).isMatrixMultiply() ) //X%*%Y
+			if( HopRewriteUtils.isMatrixMultiply(hi2) ) //X%*%Y
 			{
 				Hop left = hi2.getInput().get(0);
 				Hop right = hi2.getInput().get(1);
 				
-				//remove link from parent to diag
-				HopRewriteUtils.removeChildReference(parent, hi);
-				
-				//remove links to inputs to matrix mult
-				//removeChildReference(hi2, left);
-				//removeChildReference(hi2, right);
-				
 				//create new operators (incl refresh size inside for transpose)
 				ReorgOp trans = HopRewriteUtils.createTranspose(right);
-				BinaryOp mult = new BinaryOp(right.getName(), right.getDataType(), right.getValueType(), OpOp2.MULT, left, trans);
-				mult.setRowsInBlock(right.getRowsInBlock());
-				mult.setColsInBlock(right.getColsInBlock());
-				mult.refreshSizeInformation();
-				AggUnaryOp rowSum = new AggUnaryOp(right.getName(), right.getDataType(), right.getValueType(), AggOp.SUM, Direction.Row, mult);
-				rowSum.setRowsInBlock(right.getRowsInBlock());
-				rowSum.setColsInBlock(right.getColsInBlock());
-				rowSum.refreshSizeInformation();
+				BinaryOp mult = HopRewriteUtils.createBinary(left, trans, OpOp2.MULT);
+				AggUnaryOp rowSum = HopRewriteUtils.createAggUnaryOp(mult, AggOp.SUM, Direction.Row);
 				
 				//rehang new subdag under parent node
-				HopRewriteUtils.addChildReference(parent, rowSum, pos);				
-				parent.refreshSizeInformation();
-				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
-				if( hi2.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi2 );
-				
-				hi = rowSum;
+				HopRewriteUtils.replaceChildReference(parent, hi, rowSum, pos);				
+				HopRewriteUtils.cleanupUnreferenced(hi, hi2);
 				
+				hi = rowSum;			
 				LOG.debug("Applied simplifyDiagMatrixMult");
 			}	
 		}
@@ -1174,16 +1015,12 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					Hop hi3 = hi2.getInput().get(0);
 					
 					//remove diag operator
-					HopRewriteUtils.removeChildReference(au, hi2);
-					HopRewriteUtils.addChildReference(au, hi3, 0);	
+					HopRewriteUtils.replaceChildReference(au, hi2, hi3, 0);	
+					HopRewriteUtils.cleanupUnreferenced(hi2);
 					
 					//change sum to trace
 					au.setOp( AggOp.TRACE );
 					
-					//cleanup if only consumer of intermediate
-					if( hi2.getParent().isEmpty() ) 
-						HopRewriteUtils.removeAllChildReferences( hi2 );
-					
 					LOG.debug("Applied simplifySumDiagToTrace");
 				}
 			}
@@ -1198,7 +1035,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	{
 		//diag(X)*7 --> diag(X*7) in order to (1) reduce required memory for b(*) and
 		//(2) in order to make the binary operation more efficient (dense vector vs sparse matrix)
-		if( hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==OpOp2.MULT )
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MULT) )
 		{
 			Hop left = hi.getInput().get(0);
 			Hop right = hi.getInput().get(1);
@@ -1279,7 +1116,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	 */
 	private Hop pushdownSumOnAdditiveBinary(Hop parent, Hop hi, int pos) 
 	{
-		//all patterns headed by fiull sum over binary operation
+		//all patterns headed by full sum over binary operation
 		if(    hi instanceof AggUnaryOp //full sum root over binaryop
 			&& ((AggUnaryOp)hi).getDirection()==Direction.RowCol
 			&& ((AggUnaryOp)hi).getOp() == AggOp.SUM 
@@ -1305,13 +1142,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					BinaryOp newBin = HopRewriteUtils.createBinary(sum1, sum2, applyOp);
 
 					//rewire new subdag
-					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);		
-					HopRewriteUtils.addChildReference(parent, newBin, pos);
-					if( hi.getParent().isEmpty() )
-						HopRewriteUtils.removeAllChildReferences(hi);
-					if( bop.getParent().isEmpty() )
-						HopRewriteUtils.removeAllChildReferences(bop);
-					
+					HopRewriteUtils.replaceChildReference(parent, hi, newBin, pos);
+					HopRewriteUtils.cleanupUnreferenced(hi, bop);
 					
 					hi = newBin;
 					
@@ -1362,17 +1194,16 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 1) sum (W * (X - U %*% t(V)) ^ 2) (post weighting)
 			//alternative pattern: sum (W * (U %*% t(V) - X) ^ 2)
-			if( bop.getOp()==OpOp2.MULT && bop.getInput().get(1) instanceof BinaryOp	
+			if( bop.getOp()==OpOp2.MULT && HopRewriteUtils.isBinary(bop.getInput().get(1), OpOp2.POW)
 				&& bop.getInput().get(0).getDataType()==DataType.MATRIX	
 				&& HopRewriteUtils.isEqualSize(bop.getInput().get(0), bop.getInput().get(1)) //prevent mv
-				&& ((BinaryOp)bop.getInput().get(1)).getOp()==OpOp2.POW 
 				&& bop.getInput().get(1).getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)bop.getInput().get(1).getInput().get(1))==2)
 			{
 				Hop W = bop.getInput().get(0);
 				Hop tmp = bop.getInput().get(1).getInput().get(0); //(X - U %*% t(V))
 				
-				if( tmp instanceof BinaryOp && ((BinaryOp)tmp).getOp()==OpOp2.MINUS
+				if( HopRewriteUtils.isBinary(tmp, OpOp2.MINUS)
 					&& HopRewriteUtils.isEqualSize(tmp.getInput().get(0), tmp.getInput().get(1)) //prevent mv	
 					&& tmp.getInput().get(0).getDataType() == DataType.MATRIX )
 				{
@@ -1424,9 +1255,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if( !appliedPattern
 				&& bop.getOp()==OpOp2.POW && bop.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)bop.getInput().get(1))==2
-				&& bop.getInput().get(0) instanceof BinaryOp	
+				&& HopRewriteUtils.isBinary(bop.getInput().get(0), OpOp2.MINUS)	
 				&& bop.getInput().get(0).getDataType()==DataType.MATRIX	
-				&& ((BinaryOp)bop.getInput().get(0)).getOp()==OpOp2.MINUS
 				&& HopRewriteUtils.isEqualSize(bop.getInput().get(0).getInput().get(0), bop.getInput().get(0).getInput().get(1)) //prevent mv
 				&& bop.getInput().get(0).getInput().get(0).getDataType()==DataType.MATRIX)
 			{
@@ -1479,9 +1309,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if( !appliedPattern
 				&& bop.getOp()==OpOp2.POW && bop.getInput().get(1) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)bop.getInput().get(1))==2
-				&& bop.getInput().get(0) instanceof BinaryOp	
+				&& HopRewriteUtils.isBinary(bop.getInput().get(0), OpOp2.MINUS) 	
 				&& bop.getInput().get(0).getDataType()==DataType.MATRIX	
-				&& ((BinaryOp)bop.getInput().get(0)).getOp()==OpOp2.MINUS
 				&& HopRewriteUtils.isEqualSize(bop.getInput().get(0).getInput().get(0), bop.getInput().get(0).getInput().get(1)) //prevent mv
 				&& bop.getInput().get(0).getInput().get(0).getDataType()==DataType.MATRIX)
 			{
@@ -1529,8 +1358,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//relink new hop into original position
 		if( hnew != null ) {
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, hnew, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 			hi = hnew;
 		}
 		
@@ -1542,8 +1370,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	{
 		Hop hnew = null;
 		
-		if(    hi instanceof BinaryOp //all patterns subrooted by W *
-			&& ((BinaryOp) hi).getOp()==OpOp2.MULT
+		if( HopRewriteUtils.isBinary(hi, OpOp2.MULT) //all patterns subrooted by W *
 			&& hi.getDim2() > 1       //not applied for vector-vector mult
 			&& HopRewriteUtils.isEqualSize(hi.getInput().get(0), hi.getInput().get(1)) //prevent mv
 			&& hi.getInput().get(0).getDataType()==DataType.MATRIX 
@@ -1569,7 +1396,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, false, false);
-				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+				hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 				hnew.refreshSizeInformation();
 				
 				appliedPattern = true;
@@ -1579,8 +1406,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//Pattern 2) W * sigmoid(-(Y%*%t(X))) (minus)
 			if(    !appliedPattern 
 				&& uop.getOp() == OpOp1.SIGMOID 
-				&& uop.getInput().get(0) instanceof BinaryOp
-				&& ((BinaryOp)uop.getInput().get(0)).getOp()==OpOp2.MINUS
+				&& HopRewriteUtils.isBinary(uop.getInput().get(0), OpOp2.MINUS)
 				&& uop.getInput().get(0).getInput().get(0) instanceof LiteralOp
 				&& HopRewriteUtils.getDoubleValueSafe(
 				   (LiteralOp)uop.getInput().get(0).getInput().get(0))==0
@@ -1599,7 +1425,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, false, true);
-				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+				hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 				hnew.refreshSizeInformation();
 				
 				appliedPattern = true;
@@ -1609,8 +1435,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//Pattern 3) W * log(sigmoid(Y%*%t(X))) (log)			
 			if(    !appliedPattern 
 				&& uop.getOp() == OpOp1.LOG
-				&& uop.getInput().get(0) instanceof UnaryOp
-				&& ((UnaryOp)uop.getInput().get(0)).getOp() == OpOp1.SIGMOID 
+				&& HopRewriteUtils.isUnary(uop.getInput().get(0), OpOp1.SIGMOID) 
 				&& uop.getInput().get(0).getInput().get(0) instanceof AggBinaryOp
 				&& HopRewriteUtils.isSingleBlock(uop.getInput().get(0).getInput().get(0).getInput().get(0),true) )
 			{
@@ -1626,7 +1451,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, true, false);
-				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+				hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 				hnew.refreshSizeInformation();
 				
 				appliedPattern = true;
@@ -1636,14 +1461,12 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//Pattern 4) W * log(sigmoid(-(Y%*%t(X)))) (log_minus)
 			if(    !appliedPattern 
 				&& uop.getOp() == OpOp1.LOG
-				&& uop.getInput().get(0) instanceof UnaryOp
-				&& ((UnaryOp)uop.getInput().get(0)).getOp() == OpOp1.SIGMOID 
-				&& uop.getInput().get(0).getInput().get(0) instanceof BinaryOp )
+				&& HopRewriteUtils.isUnary(uop.getInput().get(0), OpOp1.SIGMOID) 
+				&& HopRewriteUtils.isBinary(uop.getInput().get(0).getInput().get(0), OpOp2.MINUS) )
 			{
 				BinaryOp bop = (BinaryOp) uop.getInput().get(0).getInput().get(0);
 				
-				if(    bop.getOp() == OpOp2.MINUS 
-					&& bop.getInput().get(0) instanceof LiteralOp
+				if(    bop.getInput().get(0) instanceof LiteralOp
 					&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)bop.getInput().get(0))==0
 					&& bop.getInput().get(1) instanceof AggBinaryOp
 					&& HopRewriteUtils.isSingleBlock(bop.getInput().get(1).getInput().get(0),true))
@@ -1660,7 +1483,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WSIGMOID, W, Y, tX, true, true);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					appliedPattern = true;
@@ -1671,8 +1494,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//relink new hop into original position
 		if( hnew != null ) {
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, hnew, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 			hi = hnew;
 		}
 		
@@ -1687,7 +1509,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//left/right patterns rooted by 'ab - b(div)' or 'ab - b(mult)'
 		//note: we do not rewrite t(X)%*%(w*(X%*%v)) where w and v are vectors (see mmchain ops) 
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply()  
+		if( HopRewriteUtils.isMatrixMultiply(hi)  
 			&& (hi.getInput().get(0) instanceof BinaryOp
 			&& HopRewriteUtils.isValidOp(((BinaryOp)hi.getInput().get(0)).getOp(), LOOKUP_VALID_WDIVMM_BINARY)
 			|| hi.getInput().get(1) instanceof BinaryOp 
@@ -1718,7 +1540,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					boolean mult = ((BinaryOp)right).getOp() == OpOp2.MULT;
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 1, mult, false);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
@@ -1731,10 +1553,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 1e) t(U) %*% (W/(U%*%t(V) + x))
 			if( !appliedPattern
-				&& right instanceof BinaryOp && ((BinaryOp)right).getOp() == LOOKUP_VALID_WDIVMM_BINARY[1] //DIV
+				&& HopRewriteUtils.isBinary(right, LOOKUP_VALID_WDIVMM_BINARY[1]) //DIV
 				&& HopRewriteUtils.isEqualSize(right.getInput().get(0), right.getInput().get(1)) //prevent mv
-				&& right.getInput().get(1) instanceof BinaryOp
-				&& ((BinaryOp) right.getInput().get(1)).getOp() == Hop.OpOp2.PLUS
+				&& HopRewriteUtils.isBinary(right.getInput().get(1), Hop.OpOp2.PLUS)
 				&& right.getInput().get(1).getInput().get(1).getDataType() == DataType.SCALAR
 				&& HopRewriteUtils.isOuterProductLikeMM(right.getInput().get(1).getInput().get(0))
 				&& HopRewriteUtils.isSingleBlock(right.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1753,7 +1574,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 3, false, false); // 3=>DIV_LEFT_EPS
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
@@ -1786,7 +1607,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					boolean mult = ((BinaryOp)left).getOp() == OpOp2.MULT;
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 2, mult, false);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					appliedPattern = true;
@@ -1796,10 +1617,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 2e) (W/(U%*%t(V) + x)) %*% V
 			if( !appliedPattern
-				&& left instanceof BinaryOp && ((BinaryOp)left).getOp() == LOOKUP_VALID_WDIVMM_BINARY[1] //DIV
+				&& HopRewriteUtils.isBinary(left, LOOKUP_VALID_WDIVMM_BINARY[1]) //DIV
 				&& HopRewriteUtils.isEqualSize(left.getInput().get(0), left.getInput().get(1)) //prevent mv
-				&& left.getInput().get(1) instanceof BinaryOp
-				&& ((BinaryOp) left.getInput().get(1)).getOp() == Hop.OpOp2.PLUS
+				&& HopRewriteUtils.isBinary(left.getInput().get(1), Hop.OpOp2.PLUS)
 				&& left.getInput().get(1).getInput().get(1).getDataType() == DataType.SCALAR
 				&& HopRewriteUtils.isOuterProductLikeMM(left.getInput().get(1).getInput().get(0))
 				&& HopRewriteUtils.isSingleBlock(left.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1818,7 +1638,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 4, false, false); // 4=>DIV_RIGHT_EPS
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					appliedPattern = true;
@@ -1828,8 +1648,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 3) t(U) %*% ((X!=0)*(U%*%t(V)-X))
 			if( !appliedPattern
-				&& right instanceof BinaryOp && ((BinaryOp)right).getOp()==LOOKUP_VALID_WDIVMM_BINARY[0] //MULT
-				&& right.getInput().get(1) instanceof BinaryOp && ((BinaryOp)right.getInput().get(1)).getOp()==OpOp2.MINUS	
+				&& HopRewriteUtils.isBinary(right, LOOKUP_VALID_WDIVMM_BINARY[0]) //MULT
+				&& HopRewriteUtils.isBinary(right.getInput().get(1), OpOp2.MINUS)	
 				&& HopRewriteUtils.isOuterProductLikeMM(right.getInput().get(1).getInput().get(0))
 				&& right.getInput().get(1).getInput().get(1).getDataType() == DataType.MATRIX
 				&& HopRewriteUtils.isSingleBlock(right.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1849,7 +1669,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, X, U, V, new LiteralOp(-1), 1, true, true);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
@@ -1862,8 +1682,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 4) ((X!=0)*(U%*%t(V)-X)) %*% V
 			if( !appliedPattern
-				&& left instanceof BinaryOp && ((BinaryOp)left).getOp()==LOOKUP_VALID_WDIVMM_BINARY[0] //MULT	
-				&& left.getInput().get(1) instanceof BinaryOp && ((BinaryOp)left.getInput().get(1)).getOp()==OpOp2.MINUS	
+				&& HopRewriteUtils.isBinary(left, LOOKUP_VALID_WDIVMM_BINARY[0]) //MULT	
+				&& HopRewriteUtils.isBinary(left.getInput().get(1), OpOp2.MINUS)	
 				&& HopRewriteUtils.isOuterProductLikeMM(left.getInput().get(1).getInput().get(0))
 				&& left.getInput().get(1).getInput().get(1).getDataType() == DataType.MATRIX
 				&& HopRewriteUtils.isSingleBlock(left.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1883,7 +1703,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, X, U, V, new LiteralOp(-1), 2, true, true);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					appliedPattern = true;
@@ -1893,8 +1713,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 5) t(U) %*% (W*(U%*%t(V)-X))
 			if( !appliedPattern
-				&& right instanceof BinaryOp && ((BinaryOp)right).getOp()==LOOKUP_VALID_WDIVMM_BINARY[0] //MULT
-				&& right.getInput().get(1) instanceof BinaryOp && ((BinaryOp)right.getInput().get(1)).getOp()==OpOp2.MINUS	
+				&& HopRewriteUtils.isBinary(right, LOOKUP_VALID_WDIVMM_BINARY[0]) //MULT
+				&& HopRewriteUtils.isBinary(right.getInput().get(1), OpOp2.MINUS)	
 				&& HopRewriteUtils.isOuterProductLikeMM(right.getInput().get(1).getInput().get(0))
 				&& right.getInput().get(1).getInput().get(1).getDataType() == DataType.MATRIX
 				&& HopRewriteUtils.isSingleBlock(right.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1914,7 +1734,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					//note: x and w exchanged compared to patterns 1-4, 7
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 1, true, true);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
@@ -1927,8 +1747,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//Pattern 6) (W*(U%*%t(V)-X)) %*% V
 			if( !appliedPattern
-				&& left instanceof BinaryOp && ((BinaryOp)left).getOp()==LOOKUP_VALID_WDIVMM_BINARY[0] //MULT	
-				&& left.getInput().get(1) instanceof BinaryOp && ((BinaryOp)left.getInput().get(1)).getOp()==OpOp2.MINUS	
+				&& HopRewriteUtils.isBinary(left, LOOKUP_VALID_WDIVMM_BINARY[0]) //MULT	
+				&& HopRewriteUtils.isBinary(left.getInput().get(1), OpOp2.MINUS)	
 				&& HopRewriteUtils.isOuterProductLikeMM(left.getInput().get(1).getInput().get(0))
 				&& left.getInput().get(1).getInput().get(1).getDataType() == DataType.MATRIX
 				&& HopRewriteUtils.isSingleBlock(left.getInput().get(1).getInput().get(0).getInput().get(0),true) ) //BLOCKSIZE CONSTRAINT
@@ -1948,7 +1768,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					//note: x and w exchanged compared to patterns 1-4, 7
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 2, true, true);
-					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 					hnew.refreshSizeInformation();
 					
 					appliedPattern = true;
@@ -1959,7 +1779,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//Pattern 7) (W*(U%*%t(V)))
 		if( !appliedPattern
-			&& hi instanceof BinaryOp && ((BinaryOp)hi).getOp()==LOOKUP_VALID_WDIVMM_BINARY[0] //MULT	
+			&& HopRewriteUtils.isBinary(hi, LOOKUP_VALID_WDIVMM_BINARY[0]) //MULT	
 			&& HopRewriteUtils.isEqualSize(hi.getInput().get(0), hi.getInput().get(1)) //prevent mv
 			&& hi.getDim2() > 1 //not applied for vector-vector mult
 			&& hi.getInput().get(0).getDataType() == DataType.MATRIX 
@@ -1982,7 +1802,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 0, true, false);
-				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+				hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 				hnew.refreshSizeInformation();
 				
 				appliedPattern = true;
@@ -1992,8 +1812,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//relink new hop into original position
 		if( hnew != null ) {
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, hnew, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 			hi = hnew;
 		}
 		
@@ -2018,7 +1837,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//Pattern 1) sum( X * log(U %*% t(V)))
 			if( bop.getOp()==OpOp2.MULT && left.getDataType()==DataType.MATRIX		
 				&& HopRewriteUtils.isEqualSize(left, right)  //prevent mb
-				&& right instanceof UnaryOp	&& ((UnaryOp)right).getOp()==OpOp1.LOG
+				&& HopRewriteUtils.isUnary(right, OpOp1.LOG)
 				&& right.getInput().get(0) instanceof AggBinaryOp  //ba gurantees matrices
 				&& HopRewriteUtils.isSingleBlock(right.getInput().get(0).getInput().get(0),true)) //BLOCKSIZE CONSTRAINT
 			{
@@ -2033,7 +1852,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 				hnew = new QuaternaryOp(hi.getName(), DataType.SCALAR, ValueType.DOUBLE, OpOp4.WCEMM, X, U, V,
 						new LiteralOp(0.0), 0, false, false);
-				HopRewriteUtils.setOutputBlocksizes(hnew, X.getRowsInBlock(), X.getColsInBlock());
+				hnew.setOutputBlocksizes(X.getRowsInBlock(), X.getColsInBlock());
 				appliedPattern = true;
 				
 				LOG.debug("Applied simplifyWeightedCEMM (line "+hi.getBeginLine()+")");					
@@ -2043,9 +1862,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if( !appliedPattern
 				&& bop.getOp()==OpOp2.MULT && left.getDataType()==DataType.MATRIX		
 				&& HopRewriteUtils.isEqualSize(left, right)
-				&& right instanceof UnaryOp	&& ((UnaryOp)right).getOp()==OpOp1.LOG
-				&& right.getInput().get(0) instanceof BinaryOp
-				&& ((BinaryOp)right.getInput().get(0)).getOp() == OpOp2.PLUS
+				&& HopRewriteUtils.isUnary(right, OpOp1.LOG)
+				&& HopRewriteUtils.isBinary(right.getInput().get(0), OpOp2.PLUS)
 				&& right.getInput().get(0).getInput().get(0) instanceof AggBinaryOp
 				&& right.getInput().get(0).getInput().get(1) instanceof LiteralOp
 				&& right.getInput().get(0).getInput().get(1).getDataType() == DataType.SCALAR
@@ -2063,7 +1881,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 				hnew = new QuaternaryOp(hi.getName(), DataType.SCALAR, ValueType.DOUBLE, 
 						OpOp4.WCEMM, X, U, V, eps, 1, false, false); // 1 => BASIC_EPS
-				HopRewriteUtils.setOutputBlocksizes(hnew, X.getRowsInBlock(), X.getColsInBlock());
+				hnew.setOutputBlocksizes(X.getRowsInBlock(), X.getColsInBlock());
 					
 				LOG.debug("Applied simplifyWeightedCEMMEps (line "+hi.getBeginLine()+")");					
 			}
@@ -2071,8 +1889,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//relink new hop into original position
 		if( hnew != null ) {
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, hnew, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 			hi = hnew;
 		}
 		
@@ -2109,7 +1926,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				
 			hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 					  OpOp4.WUMM, W, U, V, mult, op, null);
-			HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+			hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 			hnew.refreshSizeInformation();
 			
 			appliedPattern = true;
@@ -2162,7 +1979,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WUMM, W, U, V, mult, null, op);
-				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+				hnew.setOutputBlocksizes(W.getRowsInBlock(), W.getColsInBlock());
 				hnew.refreshSizeInformation();
 				
 				appliedPattern = true;
@@ -2173,8 +1990,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		
 		//relink new hop into original position
 		if( hnew != null ) {
-			HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-			HopRewriteUtils.addChildReference(parent, hnew, pos);
+			HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 			hi = hnew;
 		}
 		
@@ -2207,7 +2023,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			Hop hi2 = hi.getInput().get(0); //check for ^2 w/o multiple consumers
 			//check for sum(v^2), might have been rewritten from sum(v*v)
-			if( hi2 instanceof BinaryOp && ((BinaryOp)hi2).getOp()==OpOp2.POW
+			if( HopRewriteUtils.isBinary(hi2, OpOp2.POW)
 				&& hi2.getInput().get(1) instanceof LiteralOp 
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)hi2.getInput().get(1))==2
 				&& hi2.getParent().size() == 1 ) //no other consumer than sum
@@ -2217,11 +2033,10 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				baRight = input;
 			}
 			//check for sum(v1*v2), but prevent to rewrite sum(v1*v2*v3) which is later compiled into a ta+* lop
-			else if(   hi2 instanceof BinaryOp && ((BinaryOp)hi2).getOp()==OpOp2.MULT
+			else if( HopRewriteUtils.isBinary(hi2, OpOp2.MULT, 1) //no other consumer than sum
 					&& hi2.getInput().get(0).getDim2()==1 && hi2.getInput().get(1).getDim2()==1
-					&& hi2.getParent().size() == 1  //no other consumer than sum
-					&& !(hi2.getInput().get(0) instanceof BinaryOp && ((BinaryOp)hi2.getInput().get(0)).getOp()==OpOp2.MULT)
-					&& !(hi2.getInput().get(1) instanceof BinaryOp && ((BinaryOp)hi2.getInput().get(1)).getOp()==OpOp2.MULT))
+					&& !HopRewriteUtils.isBinary(hi2.getInput().get(0), OpOp2.MULT)
+					&& !HopRewriteUtils.isBinary(hi2.getInput().get(1), OpOp2.MULT) )
 			{
 				baLeft = hi2.getInput().get(0);
 				baRight = hi2.getInput().get(1);
@@ -2230,25 +2045,14 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//perform actual rewrite (if necessary)
 			if( baLeft != null && baRight != null  )
 			{
-				//remove link from parent to diag
-				HopRewriteUtils.removeChildReference(parent, hi);
-				
 				//create new operator chain
 				ReorgOp trans = HopRewriteUtils.createTranspose(baLeft);
 				AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(trans, baRight);
-				
-				UnaryOp cast = new UnaryOp(baLeft.getName(), DataType.SCALAR, ValueType.DOUBLE, OpOp1.CAST_AS_SCALAR, mmult);
-				HopRewriteUtils.setOutputParameters(cast, 0, 0, 0, 0, -1);
+				UnaryOp cast = HopRewriteUtils.createUnary(mmult, OpOp1.CAST_AS_SCALAR);
 				
 				//rehang new subdag under parent node
-				HopRewriteUtils.addChildReference(parent, cast, pos);				
-				parent.refreshSizeInformation();
-				
-				//cleanup if only consumer of intermediate
-				if( hi.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi );
-				if( hi2.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hi2 );
+				HopRewriteUtils.replaceChildReference(parent, hi, cast, pos);				
+				HopRewriteUtils.cleanupUnreferenced(hi, hi2);
 				
 				hi = cast;
 				
@@ -2277,7 +2081,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			Hop sumInput = hi.getInput().get(0);
 
 			// if input to SUM is POW(X,2), and no other consumers of the POW(X,2) HOP
-			if (sumInput instanceof BinaryOp && ((BinaryOp) sumInput).getOp() == OpOp2.POW
+			if( HopRewriteUtils.isBinary(sumInput, OpOp2.POW)
 					&& sumInput.getInput().get(1) instanceof LiteralOp
 					&& HopRewriteUtils.getDoubleValue((LiteralOp) sumInput.getInput().get(1)) == 2
 					&& sumInput.getParent().size() == 1) {
@@ -2286,24 +2090,13 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				// if X is NOT a column vector
 				if (x.getDim2() > 1) {
 					// perform rewrite from SUM(POW(X,2)) to SUM_SQ(X)
-					DataType dt = hi.getDataType();
-					ValueType vt = hi.getValueType();
 					Direction dir = ((AggUnaryOp) hi).getDirection();
-					long brlen = hi.getRowsInBlock();
-					long bclen = hi.getColsInBlock();
-					AggUnaryOp sumSq = new AggUnaryOp("sumSq", dt, vt, AggOp.SUM_SQ, dir, x);
-					HopRewriteUtils.setOutputBlocksizes(sumSq, brlen, bclen);
-					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-					HopRewriteUtils.addChildReference(parent, sumSq, pos);
-
-					// cleanup
-					if (hi.getParent().isEmpty())
-						HopRewriteUtils.removeAllChildReferences(hi);
-					if(sumInput.getParent().isEmpty())
-						HopRewriteUtils.removeAllChildReferences(sumInput);
-
-					// replace current HOP with new SUM_SQ HOP
+					AggUnaryOp sumSq = HopRewriteUtils.createAggUnaryOp(x, AggOp.SUM_SQ, dir);
+					HopRewriteUtils.replaceChildReference(parent, hi, sumSq, pos);
+					HopRewriteUtils.cleanupUnreferenced(hi, sumInput);
 					hi = sumSq;
+					
+					LOG.debug("Applied fuseSumSquared.");
 				}
 			}
 		}
@@ -2358,8 +2151,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			
 			//rewire parent-child operators if rewrite applied
 			if( ternop != null ) { 
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, ternop, pos);
+				HopRewriteUtils.replaceChildReference(parent, hi, ternop, pos);
 				hi = ternop;
 			}
 		}
@@ -2421,15 +2213,9 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 						hnew = null;
 				}
 				
-				if( hnew != null )
-				{
-					//remove unnecessary matrix mult 
-					HopRewriteUtils.removeChildReference(parent, hi);
-					
+				if( hnew != null ) {
 					//create datagen and add it to parent
-					HopRewriteUtils.addChildReference(parent, hnew, pos);
-					parent.refreshSizeInformation();
-					
+					HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
 					hi = hnew;
 					
 					LOG.debug("Applied simplifyEmptyBinaryOperation");
@@ -2460,12 +2246,12 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 	private Hop reorderMinusMatrixMult(Hop parent, Hop hi, int pos) 
 		throws HopsException
 	{
-		if( hi instanceof AggBinaryOp && ((AggBinaryOp)hi).isMatrixMultiply() ) //X%*%Y
+		if( HopRewriteUtils.isMatrixMultiply(hi) ) //X%*%Y
 		{
 			Hop hileft = hi.getInput().get(0);
 			Hop hiright = hi.getInput().get(1);
 			
-			if( hileft instanceof BinaryOp && ((BinaryOp)hileft).getOp()==OpOp2.MINUS  //X=-Z
+			if( HopRewriteUtils.isBinary(hileft, OpOp2.MINUS)  //X=-Z
 				&& hileft.getInput().get(0) instanceof LiteralOp 
 				&& HopRewriteUtils.getDoubleValue((LiteralOp)hileft.getInput().get(0))==0.0 
 				&& hi.dimsKnown() && hileft.getInput().get(1).dimsKnown()   //size comparison
@@ -2480,9 +2266,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				ArrayList<Hop> parents = (ArrayList<Hop>) hi.getParent().clone();
 				
 				//create new operators 
-				BinaryOp minus = new BinaryOp(hi.getName(), hi.getDataType(), hi.getValueType(), OpOp2.MINUS, new LiteralOp(0), hi);			
-				minus.setRowsInBlock(hi.getRowsInBlock());
-				minus.setColsInBlock(hi.getColsInBlock());
+				BinaryOp minus = HopRewriteUtils.createBinary(new LiteralOp(0), hi, OpOp2.MINUS);
 				
 				//rehang minus under all parents
 				for( Hop p : parents ) {
@@ -2495,14 +2279,13 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				HopRewriteUtils.addChildReference(hi, hi2, 0);
 				
 				//cleanup if only consumer of minus
-				if( hileft.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hileft );
+				HopRewriteUtils.cleanupUnreferenced(hileft);
 				
 				hi = minus;
 				
 				LOG.debug("Applied reorderMinusMatrixMult (line "+hi.getBeginLine()+").");
 			}
-			else if( hiright instanceof BinaryOp && ((BinaryOp)hiright).getOp()==OpOp2.MINUS  //X=-Z
+			else if( HopRewriteUtils.isBinary(hiright, OpOp2.MINUS)  //X=-Z
 					&& hiright.getInput().get(0) instanceof LiteralOp 
 					&& HopRewriteUtils.getDoubleValue((LiteralOp)hiright.getInput().get(0))==0.0
 					&& hi.dimsKnown() && hiright.getInput().get(1).dimsKnown()     //size comparison
@@ -2517,9 +2300,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				ArrayList<Hop> parents = (ArrayList<Hop>) hi.getParent().clone();
 				
 				//create new operators 
-				BinaryOp minus = new BinaryOp(hi.getName(), hi.getDataType(), hi.getValueType(), OpOp2.MINUS, new LiteralOp(0), hi);			
-				minus.setRowsInBlock(hi.getRowsInBlock());
-				minus.setColsInBlock(hi.getColsInBlock());
+				BinaryOp minus = HopRewriteUtils.createBinary(new LiteralOp(0), hi, OpOp2.MINUS);
 				
 				//rehang minus under all parents
 				for( Hop p : parents ) {
@@ -2532,8 +2313,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				HopRewriteUtils.addChildReference(hi, hi2, 1);
 				
 				//cleanup if only consumer of minus
-				if( hiright.getParent().isEmpty() ) 
-					HopRewriteUtils.removeAllChildReferences( hiright );
+				HopRewriteUtils.cleanupUnreferenced(hiright);
 				
 				hi = minus;
 				
@@ -2592,8 +2372,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hi.refreshSizeInformation();
 			
 			//cleanup if only consumer of intermediate
-			if( hi2.getParent().isEmpty() ) 
-				HopRewriteUtils.removeAllChildReferences( hi2 );	
+			HopRewriteUtils.cleanupUnreferenced(hi2);
 		}
 		
 		return hi;
@@ -2612,11 +2391,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			if( HopRewriteUtils.isDimsKnown(right) && right.getDim1()==1 && right.getDim2()==1 ) //scalar right
 			{
 				//remove link to right child and introduce cast
-				HopRewriteUtils.removeChildReference(hi, right);
-				UnaryOp cast = new UnaryOp(right.getName(), DataType.SCALAR, ValueType.DOUBLE, 
-						                   OpOp1.CAST_AS_SCALAR, right);
-				HopRewriteUtils.setOutputParameters(cast, 0, 0, 0, 0, 0);
-				HopRewriteUtils.addChildReference(hi, cast, 1);			
+				UnaryOp cast = HopRewriteUtils.createUnary(right, OpOp1.CAST_AS_SCALAR);
+				HopRewriteUtils.replaceChildReference(hi, right, cast, 1);			
 				
 				LOG.debug("Applied simplifyScalarMVBinaryOperation.");
 			}
@@ -2631,8 +2407,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		//sum(ppred(X,0,"!=")) -> literal(nnz(X)), if nnz known		
 		if(    hi instanceof AggUnaryOp && ((AggUnaryOp)hi).getOp()==AggOp.SUM  //sum
 			&& ((AggUnaryOp)hi).getDirection() == Direction.RowCol	            //full aggregate
-			&& hi.getInput().get(0) instanceof BinaryOp 
-			&& ((BinaryOp)hi.getInput().get(0)).getOp()==OpOp2.NOTEQUAL )
+			&& HopRewriteUtils.isBinary(hi.getInput().get(0), OpOp2.NOTEQUAL) )
 		{
 			Hop ppred = hi.getInput().get(0);
 			Hop X = null;
@@ -2650,13 +2425,10 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			//apply rewrite if known nnz 
 			if( X != null && X.getNnz() > 0 ){
 				Hop hnew = new LiteralOp(X.getNnz());
-				HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
-				HopRewriteUtils.addChildReference(parent, hnew, pos);
-				
-				if( hi.getParent().isEmpty() )
-					HopRewriteUtils.removeAllChildReferences( hi );
-				
+				HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos);
+				HopRewriteUtils.cleanupUnreferenced(hi);
 				hi = hnew;
+				
 				LOG.debug("Applied simplifyNnzComputation.");	
 			}
 		}


[5/6] incubator-systemml git commit: [SYSTEMML-1302] Remove parfor perftesttool, cleanup heuristic optimizer

Posted by mb...@apache.org.
[SYSTEMML-1302] Remove parfor perftesttool, cleanup heuristic optimizer

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/35da413a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/35da413a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/35da413a

Branch: refs/heads/master
Commit: 35da413a6a23245902fae145a5e9a80981154161
Parents: 1fe1a02
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Feb 22 22:26:39 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 24 12:27:28 2017 -0800

----------------------------------------------------------------------
 .../sysml/parser/ParForStatementBlock.java      |   15 +-
 .../controlprogram/ParForProgramBlock.java      |    8 +-
 .../parfor/opt/CostEstimator.java               |   23 +-
 .../parfor/opt/CostEstimatorHops.java           |    3 +-
 .../parfor/opt/CostEstimatorRuntime.java        |  187 +--
 .../controlprogram/parfor/opt/CostFunction.java |  151 --
 .../parfor/opt/OptTreeConverter.java            |  104 +-
 .../parfor/opt/OptTreePlanMappingAbstract.java  |   10 +-
 .../parfor/opt/OptTreePlanMappingRuntime.java   |    3 +-
 .../parfor/opt/OptimizationWrapper.java         |   27 +-
 .../controlprogram/parfor/opt/Optimizer.java    |    9 +-
 .../parfor/opt/OptimizerConstrained.java        |    3 +-
 .../parfor/opt/OptimizerHeuristic.java          |   19 +-
 .../controlprogram/parfor/opt/PerfTestDef.java  |   99 --
 .../parfor/opt/PerfTestExtFunctCP.java          |   94 --
 .../parfor/opt/PerfTestMemoryObserver.java      |  105 --
 .../controlprogram/parfor/opt/PerfTestTool.java | 1411 ------------------
 .../parfor/opt/PerfTestToolRegression.dml       |   59 -
 .../parfor/ParForRulebasedOptimizerTest.java    |  149 +-
 .../functions/parfor/parfor_optimizer1b.dml     |   53 +
 .../functions/parfor/parfor_optimizer2b.dml     |  277 ++++
 .../functions/parfor/parfor_optimizer3b.dml     |   52 +
 22 files changed, 551 insertions(+), 2310 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/parser/ParForStatementBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/ParForStatementBlock.java b/src/main/java/org/apache/sysml/parser/ParForStatementBlock.java
index bd92a15..5ca0cbc 100644
--- a/src/main/java/org/apache/sysml/parser/ParForStatementBlock.java
+++ b/src/main/java/org/apache/sysml/parser/ParForStatementBlock.java
@@ -241,20 +241,7 @@ public class ParForStatementBlock extends ForStatementBlock
 					}
 					else //default case
 						params.put(key, _paramDefaults.get(key));
-				}
-			
-			//check for disabled parameters values
-			if( params.containsKey( OPT_MODE ) )
-			{
-				String optStr = params.get( OPT_MODE );
-				if(    optStr.equals(POptMode.HEURISTIC.toString()) 
-					|| optStr.equals(POptMode.GREEDY.toString()) 
-					|| optStr.equals(POptMode.FULL_DP.toString())   ) 
-				{ //always unconditional
-					raiseValidateError("Sorry, parfor optimization mode '"+optStr+"' is disabled for external usage.", false);
-				}
-			}
-			
+				}			
 		}
 		else
 		{

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
index f6c90f3..af3a0d1 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
@@ -195,12 +195,10 @@ public class ParForProgramBlock extends ForProgramBlock
 	
 	//optimizer
 	public enum POptMode{
-		NONE,       //no optimization, use defaults and specified parameters
-		RULEBASED, //some simple rule-based rewritings (affects only parfor PB) - similar to HEURISTIC but no exec time estimates
+		NONE,        //no optimization, use defaults and specified parameters
+		RULEBASED,   //rule-based rewritings with memory constraints 
 		CONSTRAINED, //same as rule-based but with given params as constraints
-		HEURISTIC, //some simple cost-based rewritings (affects only parfor PB)
-		GREEDY,     //greedy cost-based optimization algorithm (potentially local optimum, affects all instructions)
-		FULL_DP,    //full cost-based optimization algorithm (global optimum, affects all instructions)				
+		HEURISTIC,   //smae as rule-based but with time-based cost estimates
 	}
 		
 	// internal parameters

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
index 6e428d1..bb3ca88 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
@@ -184,17 +184,12 @@ public abstract class CostEstimator
 		return val;
 	}
 
-	protected double getDefaultEstimate(TestMeasure measure) 
-	{
-		double val = -1;
-		
-		switch( measure )
-		{
-			case EXEC_TIME: val = DEFAULT_TIME_ESTIMATE; break;
-			case MEMORY_USAGE: val = DEFAULT_MEM_ESTIMATE_CP; break;
-		}
-		
-		return val;
+	protected double getDefaultEstimate(TestMeasure measure)  {
+		switch( measure ) {
+			case EXEC_TIME:    return DEFAULT_TIME_ESTIMATE;
+			case MEMORY_USAGE: return DEFAULT_MEM_ESTIMATE_CP;
+		}		
+		return -1;
 	}
 
 	protected double getMaxEstimate( TestMeasure measure, ArrayList<OptNode> nodes, ExecType et ) 
@@ -202,11 +197,7 @@ public abstract class CostEstimator
 	{
 		double max = Double.MIN_VALUE; //smallest positive value
 		for( OptNode n : nodes )
-		{
-			double tmp = getEstimate( measure, n, et );
-			if( tmp > max )
-				max = tmp;
-		}
+			max = Math.max(max, getEstimate(measure, n, et));
 		return max;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
index 02ba9ed..b65d80f 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
@@ -42,8 +42,7 @@ public class CostEstimatorHops extends CostEstimator
 	}
 	
 	
-	public CostEstimatorHops( OptTreePlanMappingAbstract map )
-	{
+	public CostEstimatorHops( OptTreePlanMappingAbstract map ) {
 		_map = map;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
index cbb8260..442ad48 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
@@ -20,178 +20,67 @@
 package org.apache.sysml.runtime.controlprogram.parfor.opt;
 
 
-import org.apache.sysml.lops.Lop;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.hops.cost.CostEstimationWrapper;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVariable;
+import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
+import org.apache.sysml.runtime.controlprogram.ProgramBlock;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
 
 /**
- * Cost Estimator for runtime programs. It uses a offline created performance profile
- * (see PerfTestTool) in order to estimate execution time, memory consumption etc of
- * instructions and program blocks with regard to given data characteristics (e.g., 
- * dimensionality, data format, sparsity) and program parameters (e.g., degree of parallelism).
- * If no performance profile cost function exists for a given TestVariables, TestMeasures, and
- * instructions combination, default values are used. Furthermore, the cost estimator provides
- * basic functionalities for estimation of cardinality and sparsity of intermediate results.
+ * Cost estimator for runtime programs. Previously this estimator used an offline created
+ * performance profile. Since SystemML 1.0, this estimator uses a time-based cost model
+ * that relies on floating operations and I/O, which does not require explicit profiling.
  * 
- * TODO: inst names as constants in perftesttool
- * TODO: complexity corrections for sparse matrices
  */
-@SuppressWarnings("deprecation")
 public class CostEstimatorRuntime extends CostEstimator
 {	
+	private final CostEstimatorHops _costMem;
+	private final OptTreePlanMappingAbstract _map;
+	private final ExecutionContext _ec;
 	
-	//internal estimation parameters
-	public static final boolean COMBINE_ESTIMATION_PATHS = true;
+	public CostEstimatorRuntime(OptTreePlanMappingAbstract map, LocalVariableMap vars ) {
+		_costMem = new CostEstimatorHops(map);
+		_map = map;
 		
+		//construct execution context as wrapper to hand over
+		//deep copied symbol table to cost estimator
+		_ec = ExecutionContextFactory.createContext();
+		_ec.setVariables(vars);
+	}
+	
 	@Override
 	public double getLeafNodeEstimate( TestMeasure measure, OptNode node ) 
 		throws DMLRuntimeException
 	{
-		double val = -1;
-
-		String str = node.getInstructionName();//node.getParam(ParamType.OPSTRING);
-		OptNodeStatistics stats = node.getStatistics();
-		DataFormat df = stats.getDataFormat();
-
-		double dim1 = stats.getDim1();
-		double dim2 = Math.max(stats.getDim2(), stats.getDim3()); //using max useful if just one side known
-		double dim3 = stats.getDim4();
-		double sparsity = stats.getSparsity();
-		val = getEstimate(measure, str, dim1, dim2, dim3, sparsity, df);
-		
-		//FIXME just for test until cost functions for MR are trained
-		if( node.getExecType() == OptNode.ExecType.MR )
-			val = 60000; //1min or 60k
+		//use CostEstimatorHops to get the memory estimate
+		if( measure == TestMeasure.MEMORY_USAGE )
+			return _costMem.getLeafNodeEstimate(measure, node);
 		
-		//System.out.println("measure="+measure+", operation="+str+", val="+val);
-		
-		return val;
+		//redirect to exec-type-specific estimate
+		ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
+		return getLeafNodeEstimate(measure, node, node.isCPOnly() ? ExecType.CP : REMOTE);
 	}
 	
 	@Override
 	public double getLeafNodeEstimate( TestMeasure measure, OptNode node, ExecType et ) 
 			throws DMLRuntimeException
 	{
-		//TODO for the moment invariant of et
-		
-		return getLeafNodeEstimate(measure, node);
-	}
-
-	public double getEstimate( TestMeasure measure, String instName, double dim1, double dim2, double dim3, double sparsity, DataFormat dataformat ) 
-		throws DMLRuntimeException
-	{
-		return getEstimate(measure, instName, dim1, dim2, dim3, sparsity, DEFAULT_EST_PARALLELISM, dataformat);
-	}
-
-	public double getEstimate( TestMeasure measure, String instName, double dim1, double dim2, double dim3, double sparsity, double parallelism, DataFormat dataformat )
-		throws DMLRuntimeException
-	{
-		double ret = -1;
-		double datasize = -1;
-		
-		if( instName.equals("CP"+Lop.OPERAND_DELIMITOR+"ba+*") )
-			datasize = (dim1*dim2 + dim2*dim3 + dim1*dim3)/3;
-		else
-			datasize = dim1*dim2;
-		
-		//get basic cost functions
-		CostFunction df = PerfTestTool.getCostFunction(instName, measure, TestVariable.DATA_SIZE, dataformat);
-		CostFunction sf = PerfTestTool.getCostFunction(instName, measure, TestVariable.SPARSITY, dataformat);
-		
-		if( df == null || sf == null )
-		{
-			return getDefaultEstimate( measure );
-		}
-		
-		//core merge datasize and sparsity
+		//use CostEstimatorHops to get the memory estimate
+		if( measure == TestMeasure.MEMORY_USAGE )
+			return _costMem.getLeafNodeEstimate(measure, node, et);
 		
-		//complexity corrections (inc. offset correction)
-		if( !df.isMultiDim() ) 
-		{
-			
-			ret =  aggregate( df, sf, 
-			          datasize, PerfTestTool.DEFAULT_DATASIZE, 
-			          sparsity, PerfTestTool.DEFAULT_SPARSITY );	
-			
-			//System.out.println("before correction = "+ret);
-			
-			double offset = df.estimate(0);
-			double ddim   = Math.sqrt(datasize);
-			double assumedC = -1;
-			double realC = -1;
-			
-			if( instName.equals("CP"+Lop.OPERAND_DELIMITOR+"ba+*") )
-			{
-				switch( measure )
-				{
-					case EXEC_TIME:
-						assumedC = 2*ddim * ddim * ddim + ddim * ddim;
-						if( dataformat==DataFormat.DENSE )
-							realC = 2*dim1 * dim2 * dim3 + dim1 * dim3;
-						else if( dataformat==DataFormat.SPARSE ) 
-							realC = 2*dim1 * dim2 * dim3 + dim1 * dim3;
-						break;
-					case MEMORY_USAGE:
-						assumedC = 3*ddim*ddim;
-						if( dataformat==DataFormat.DENSE )
-							realC = dim1 * dim2 + dim2 * dim3 + dim1 * dim3;
-					    else if( dataformat==DataFormat.SPARSE ) 
-					    	realC = dim1 * dim2 + dim2 * dim3 + dim1 * dim3;
-						break;
-				}
-				//actual correction (without offset)
-				ret = (ret-offset) * realC/assumedC + offset;
-			}
-			
-			/*NEW COMPLEXITY CORRECTIONS GO HERE*/
-		}
-		else
-		{
-			double ddim = Math.sqrt(PerfTestTool.DEFAULT_DATASIZE);
-			
-			ret =  aggregate( df, sf, 
-			          new double[]{dim1,dim2,dim3}, new double[]{ddim,ddim,ddim}, 
-			          sparsity, PerfTestTool.DEFAULT_SPARSITY );	
-			
+		//use static cost estimator based on floating point operations
+		//(currently only called for entire parfor program in order to
+		//decide for LOCAL vs REMOTE parfor execution)
+		double ret = DEFAULT_TIME_ESTIMATE;
+		boolean isCP = (et == ExecType.CP || et == null);		
+		if( !node.isLeaf() && isCP ) {
+			ProgramBlock pb = (ProgramBlock)_map.getMappedProg(node.getID())[1];
+			ret = CostEstimationWrapper.getTimeEstimate(pb, _ec, true);				
 		}
-
-		return ret;
-	}
-
-	private static double aggregate( CostFunction f1, CostFunction f2, double x1, double d1, double x2, double d2 )
-	{
-		double val11 = f1.estimate(x1);
-		double val12 = f1.estimate(d1);
-		double val21 = f2.estimate(x2);
-		double val22 = f2.estimate(d2);
-		
-		//estimate combined measure
-		double ret;
-		if( COMBINE_ESTIMATION_PATHS )
-			ret = ((val11 * val21 / val22) + (val21 * val11 / val12)) / 2;
-		else
-			ret = (val11 * val21 / val22);
-		
 		return ret;
 	}
-
-	private static double aggregate( CostFunction f1, CostFunction f2, double[] x1, double[] d1, double x2, double d2 )
-	{
-		double val11 = f1.estimate(x1);
-		double val12 = f1.estimate(d1);
-		double val21 = f2.estimate(x2);
-		double val22 = f2.estimate(d2);
-		
-		//estimate combined measure
-		double ret;
-		if( COMBINE_ESTIMATION_PATHS )
-			ret = ((val11 * val21 / val22) + (val21 * val11 / val12)) / 2;
-		else
-			ret = (val11 * val21 / val22);
-		
-		return ret;
-	}
-
-	
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostFunction.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostFunction.java
deleted file mode 100644
index 4bf2bd7..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostFunction.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-
-/**
- * Generic cost function of the form <code>y = f( X )</code>, where y is a TestMeasure 
- * (e.g., execution time, memory consumption) and X is a vector of input TestVariable values.
- * 
- * This is used for two different use cases (1) polynomial function, for one input parameter
- * (e.g., y=f(x): y = f_0 + f_1*x^1 + f_2*x^2 + ...) and (2) multidimensional functions 
- * (e.g., y=f(x1, x2, ...) with poynomials for all involved input parameters.
- *
- */
-public class CostFunction 
-{
-
-	
-	
-	protected static final Log LOG = LogFactory.getLog(CostFunction.class.getName());
-    
-	public static final boolean PREVENT_NEGATIVE_ESTIMATES = true;
-	
-	private double[] _params    = null;
-	private boolean  _multiDim  = false;
-	
-	public CostFunction( double[] params, boolean multiDim )
-	{
-		_params    = params;
-		_multiDim  = multiDim; 
-	}
-
-	public boolean isMultiDim()
-	{
-		return _multiDim;
-	}
-
-	public double estimate( double in )
-	{
-		double costs = 0;
-		
-		//compute the estimate for arbitrary orders of F
-		if( _params != null )
-			for( int i = 0; i<_params.length; i++ )
-			{
-				//test
-				double v1 = in;
-				double v2 = Math.pow(in, i);
-				if( i>1 && Math.abs(Math.sqrt( v2 ) - v1) > 1.0 ) //issue if larger than 1ms or 1byte
-				{
-					LOG.error("Numerical stability issue: " + v1 + " vs " + v2 );
-					continue;
-				}
-				//end test
-				
-				costs += _params[i] * Math.pow(in, i);
-			}
-		
-		costs = correctEstimate(costs);
-		
-		return costs;
-	}
-
-	public double estimate( double[] in )  
-	{
-		double costs = 0;
-		int len = in.length;
-		
-		if( _params != null )
-		{
-			costs = _params[0]; //intercept
-		
-			for( int i=0; i<len; i++ )
-				costs += _params[i+1] * in[ i ];
-			
-			for( int i=0; i<len; i++ )
-				costs += _params[len+i+1] * Math.pow(in[ i ],2);
-			
-			int ix=0;
-			for( int j=0; j<len-1; j++ )
-				for( int k=j+1; k<len; k++, ix++ )
-					costs += _params[1+2*len+ix] * in[j]*in[k];
-			
-			//double tmp=1;
-			//for( int i=0; i<len; i++ )
-			//	tmp*=in[i];
-			//costs += _params[_params.length-1]*tmp;
-		}
-	
-		costs = correctEstimate(costs);
-		
-		return costs;
-	}
-
-	public double[] getParams()
-	{
-		return _params;
-	}
-		
-	@Override
-	public String toString()
-	{
-		StringBuilder sb = new StringBuilder( "f(x) = " );
-		
-		//compute the estimate for arbitrary orders of F
-		if( _params != null )
-			for( int i = 0; i<_params.length; i++ )
-			{
-				if( i > 0 )
-					sb.append( " + " );
-				sb.append( _params[i] );
-				sb.append( " * x^" );
-				sb.append( i );
-			}
-		
-		return sb.toString();		
-	}
-
-	private double correctEstimate( double cost )
-	{
-		double ret = cost;
-		
-		//check for invalid estimates (due to polynomial functions)
-		if( PREVENT_NEGATIVE_ESTIMATES )
-		{
-			ret = (ret < 0) ? 0 : ret;
-		}
-		
-		return ret;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
index 44d4de2..80badf9 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
@@ -44,7 +44,6 @@ import org.apache.sysml.parser.ParForStatement;
 import org.apache.sysml.parser.ParForStatementBlock;
 import org.apache.sysml.parser.StatementBlock;
 import org.apache.sysml.parser.WhileStatement;
-import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.WhileStatementBlock;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
@@ -55,25 +54,17 @@ import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
 import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.ProgramBlock;
 import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.NodeType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.Optimizer.PlanInputType;
 import org.apache.sysml.runtime.instructions.Instruction;
 import org.apache.sysml.runtime.instructions.MRJobInstruction;
-import org.apache.sysml.runtime.instructions.cp.ComputationCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.Data;
 import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction;
 import org.apache.sysml.runtime.instructions.cpfile.MatrixIndexingCPFileInstruction;
 import org.apache.sysml.runtime.instructions.cpfile.ParameterizedBuiltinCPFileInstruction;
 import org.apache.sysml.runtime.instructions.spark.SPInstruction;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
 /**
  * Converter for creating an internal plan representation for a given runtime program
@@ -82,7 +73,6 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
  * NOTE: currently only one abstract and one runtime plan at a time.
  * This implies that only one parfor optimization can happen at a time.
  */
-@SuppressWarnings("deprecation")
 public class OptTreeConverter 
 {		
 	
@@ -689,98 +679,10 @@ public class OptTreeConverter
 	private static OptNodeStatistics analyzeStatistics(Instruction inst, OptNode on, LocalVariableMap vars) 
 		throws DMLRuntimeException 
 	{
-		OptNodeStatistics ret = null;
-		String instName = on.getInstructionName();
+		//since the performance test tool for offline profiling has been removed,
+		//we return default values
 		
-		if( PerfTestTool.isRegisteredInstruction(instName) )
-		{	
-			if( inst instanceof DataGenCPInstruction )
-			{
-				DataGenCPInstruction linst = (DataGenCPInstruction) inst;
-				DataFormat df = (   MatrixBlock.evalSparseFormatInMemory(linst.getRows(), linst.getCols(), (long)(linst.getSparsity()*linst.getRows()*linst.getCols())) ? 
-						            DataFormat.SPARSE : DataFormat.DENSE ); 
-				ret = new OptNodeStatistics(linst.getRows(), linst.getCols(), -1, -1, linst.getSparsity(), df);
-			}
-			else if ( inst instanceof FunctionCallCPInstruction )
-			{
-				FunctionCallCPInstruction linst = (FunctionCallCPInstruction)inst;
-				ArrayList<String> params = linst.getBoundInputParamNames();
-				ret = new OptNodeStatistics(); //default vals
-				
-				double maxSize = 0;
-				for( String param : params ) //use the largest input matrix
-				{
-					Data dat = vars.get(param);
-					if( dat!=null && dat.getDataType()==DataType.MATRIX )
-					{
-						MatrixObject mdat1 = (MatrixObject) dat;
-						MatrixCharacteristics mc1 = mdat1.getMatrixCharacteristics();
-						
-						if( mc1.getRows()*mc1.getCols() > maxSize )
-						{
-							ret.setDim1( mc1.getRows() );
-							ret.setDim2( mc1.getCols() );
-							ret.setSparsity( OptimizerUtils.getSparsity(ret.getDim1(), ret.getDim2(), mc1.getNonZeros()) ); //sparsity
-							ret.setDataFormat( MatrixBlock.evalSparseFormatInMemory(mc1.getRows(), mc1.getCols(), mc1.getNonZeros()) ? 
-									            DataFormat.SPARSE : DataFormat.DENSE ); 
-							maxSize = mc1.getRows()*mc1.getCols();
-						}
-					}
-				}
-			}
-			else if ( inst instanceof ComputationCPInstruction ) //needs to be last CP case
-			{
-				//AggregateBinaryCPInstruction, AggregateUnaryCPInstruction, 
-				//FunctionCallCPInstruction, ReorgCPInstruction
-				
-				ComputationCPInstruction linst = (ComputationCPInstruction) inst;
-				ret = new OptNodeStatistics(); //default
-				
-				if( linst.input1 != null && linst.input2 != null ) //binary
-				{
-					Data dat1 = vars.get( linst.input1.getName() );
-					Data dat2 = vars.get( linst.input2.getName() );
-					
-					if( dat1 != null )
-					{
-						MatrixObject mdat1 = (MatrixObject) dat1;
-						MatrixCharacteristics mc1 = ((MatrixFormatMetaData)mdat1.getMetaData()).getMatrixCharacteristics();
-						ret.setDim1( mc1.getRows() );
-						ret.setDim2( mc1.getCols() );
-						ret.setSparsity( OptimizerUtils.getSparsity(ret.getDim1(), ret.getDim2(), mc1.getNonZeros()) ); //sparsity
-						ret.setDataFormat( MatrixBlock.evalSparseFormatInMemory(mc1.getRows(), mc1.getCols(), mc1.getNonZeros())? DataFormat.SPARSE : DataFormat.DENSE); 
-					}
-					if( dat2 != null )
-					{
-						MatrixObject mdat2 = (MatrixObject) dat2;
-						MatrixCharacteristics mc2 = ((MatrixFormatMetaData)mdat2.getMetaData()).getMatrixCharacteristics();
-						ret.setDim3( mc2.getRows() );
-						ret.setDim4( mc2.getCols() );
-						ret.setDataFormat( MatrixBlock.evalSparseFormatInMemory(mc2.getRows(), mc2.getCols(), mc2.getNonZeros()) ? DataFormat.SPARSE : DataFormat.DENSE ); 
-					}
-				}
-				else //unary
-				{
-					if( linst.input1 != null ) 
-					{
-						Data dat1 = vars.get( linst.input1.getName() );
-						if( dat1 != null ) {
-							MatrixObject mdat1 = (MatrixObject) dat1;
-							MatrixCharacteristics mc1 = ((MatrixFormatMetaData)mdat1.getMetaData()).getMatrixCharacteristics();
-							ret.setDim1( mc1.getRows() );
-							ret.setDim2( mc1.getCols() );
-							ret.setSparsity( OptimizerUtils.getSparsity(ret.getDim1(), ret.getDim2(), mc1.getNonZeros()) ); //sparsity
-							ret.setDataFormat(MatrixBlock.evalSparseFormatInMemory(mc1.getRows(), mc1.getCols(), mc1.getNonZeros()) ? DataFormat.SPARSE : DataFormat.DENSE); 
-						}
-					}
-				}
-			}
-		}
-		
-		if( ret == null )
-			ret = new OptNodeStatistics(); //default values
-		
-		return ret; //null if not reqistered for profiling
+		return new OptNodeStatistics(); //default values
 	}
 
 	public static void replaceProgramBlock(OptNode parent, OptNode n, ProgramBlock pbOld, ProgramBlock pbNew, boolean rtMap) 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingAbstract.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingAbstract.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingAbstract.java
index 6d80f64..865dc9d 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingAbstract.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingAbstract.java
@@ -29,13 +29,11 @@ import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.ProgramBlock;
 
 public class OptTreePlanMappingAbstract extends OptTreePlanMapping
-{
-	
+{	
 	private DMLProgram _prog;
 	private Program _rtprog;
 	private Map<Long, Object> _id_hlprog;
 	private Map<Long, Object> _id_rtprog;
-	private Map<Long, Object> _id_symb; // mapping for symbol table
 	
 	public OptTreePlanMappingAbstract( )
 	{
@@ -46,7 +44,6 @@ public class OptTreePlanMappingAbstract extends OptTreePlanMapping
 		
 		_id_hlprog = new HashMap<Long, Object>();
 		_id_rtprog = new HashMap<Long, Object>();
-		_id_symb = new HashMap<Long, Object>();
 	}
 	
 	public void putRootProgram( DMLProgram prog, Program rtprog )
@@ -61,7 +58,6 @@ public class OptTreePlanMappingAbstract extends OptTreePlanMapping
 		
 		_id_hlprog.put(id, hops);
 		_id_rtprog.put(id, null);
-		_id_symb.put(id, null);
 		_id_optnode.put(id, n);	
 		
 		n.setID(id);
@@ -75,7 +71,6 @@ public class OptTreePlanMappingAbstract extends OptTreePlanMapping
 		
 		_id_hlprog.put(id, sb);
 		_id_rtprog.put(id, pb);
-		_id_symb.put(id, null);
 		_id_optnode.put(id, n);
 		n.setID(id);
 		
@@ -97,10 +92,9 @@ public class OptTreePlanMappingAbstract extends OptTreePlanMapping
 	
 	public Object[] getMappedProg( long id )
 	{
-		Object[] ret = new Object[3];
+		Object[] ret = new Object[2];
 		ret[0] = (StatementBlock)_id_hlprog.get( id );
 		ret[1] = (ProgramBlock)_id_rtprog.get( id );
-		ret[2] = (ProgramBlock)_id_symb.get( id );
 		
 		return ret;
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingRuntime.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingRuntime.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingRuntime.java
index 0a7d9f6..8c131f3 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingRuntime.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreePlanMappingRuntime.java
@@ -26,8 +26,7 @@ import org.apache.sysml.runtime.controlprogram.ProgramBlock;
 import org.apache.sysml.runtime.instructions.Instruction;
 
 public class OptTreePlanMappingRuntime extends OptTreePlanMapping
-{
-	
+{	
 	private Map<Long, Object> _id_rtprog;
 
 	public OptTreePlanMappingRuntime()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizationWrapper.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
index a7064da..2469005 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizationWrapper.java
@@ -76,7 +76,6 @@ public class OptimizationWrapper
 	
 	//internal parameters
 	public static final double PAR_FACTOR_INFRASTRUCTURE = 1.0;
-	private static final boolean ALLOW_RUNTIME_COSTMODEL = false;
 	private static final boolean CHECK_PLAN_CORRECTNESS = false; 
 	
 	static
@@ -147,12 +146,6 @@ public class OptimizationWrapper
 		CostModelType cmtype = opt.getCostModelType();
 		LOG.trace("ParFOR Opt: Created optimizer ("+otype+","+opt.getPlanInputType()+","+opt.getCostModelType());
 		
-		if( cmtype == CostModelType.RUNTIME_METRICS  //TODO remove check when perftesttool supported
-			&& !ALLOW_RUNTIME_COSTMODEL )
-		{
-			throw new DMLRuntimeException("ParFOR Optimizer "+otype+" requires cost model "+cmtype+" that is not suported yet.");
-		}
-		
 		OptTree tree = null;
 		
 		//recompile parfor body 
@@ -243,7 +236,7 @@ public class OptimizationWrapper
 		}
 		
 		//create cost estimator
-		CostEstimator est = createCostEstimator( cmtype );
+		CostEstimator est = createCostEstimator( cmtype, ec.getVariables() );
 		LOG.trace("ParFOR Opt: Created cost estimator ("+cmtype+")");
 		
 		//core optimize
@@ -296,15 +289,6 @@ public class OptimizationWrapper
 			case CONSTRAINED:
 				opt = new OptimizerConstrained();
 				break;	
-		
-			//MB: removed unused and experimental prototypes
-			//case FULL_DP:
-			//	opt = new OptimizerDPEnum();
-			//	break;
-			//case GREEDY:
-			//	opt = new OptimizerGreedyEnum();
-			//	break;
-			
 			default:
 				throw new DMLRuntimeException("Undefined optimizer: '"+otype+"'.");
 		}
@@ -312,7 +296,7 @@ public class OptimizationWrapper
 		return opt;
 	}
 
-	private static CostEstimator createCostEstimator( CostModelType cmtype ) 
+	private static CostEstimator createCostEstimator( CostModelType cmtype, LocalVariableMap vars ) 
 		throws DMLRuntimeException
 	{
 		CostEstimator est = null;
@@ -320,10 +304,13 @@ public class OptimizationWrapper
 		switch( cmtype )
 		{
 			case STATIC_MEM_METRIC:
-				est = new CostEstimatorHops( OptTreeConverter.getAbstractPlanMapping() );
+				est = new CostEstimatorHops( 
+						OptTreeConverter.getAbstractPlanMapping() );
 				break;
 			case RUNTIME_METRICS:
-				est = new CostEstimatorRuntime();
+				est = new CostEstimatorRuntime( 
+						OptTreeConverter.getAbstractPlanMapping(), 
+						(LocalVariableMap)vars.clone() );
 				break;
 			default:
 				throw new DMLRuntimeException("Undefined cost model type: '"+cmtype+"'.");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/Optimizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/Optimizer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/Optimizer.java
index fdeffb8..af92b80 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/Optimizer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/Optimizer.java
@@ -39,8 +39,6 @@ import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
  */
 public abstract class Optimizer 
 {
-
-	
 	protected static final Log LOG = LogFactory.getLog(Optimizer.class.getName());
 	
 	protected long _numTotalPlans     = -1;
@@ -86,14 +84,11 @@ public abstract class Optimizer
 	///////
 	//methods for evaluating the overall properties and costing  
 
-	public long getNumTotalPlans()
-	{
+	public long getNumTotalPlans() {
 		return _numTotalPlans;
 	}
 
-	public long getNumEvaluatedPlans()
-	{
+	public long getNumEvaluatedPlans() {
 		return _numEvaluatedPlans;
 	}
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
index 9754e6f..39e742f 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
@@ -62,8 +62,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 {
 
 	@Override
-	public POptMode getOptMode()
-	{
+	public POptMode getOptMode() {
 		return POptMode.CONSTRAINED;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
index a53f7f1..88d7798 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
@@ -25,31 +25,22 @@ import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeas
 
 
 /**
- * Heuristic ParFor Optimizer (time: O(n)):
+ * Heuristic ParFor Optimizer: This optimizer extends the rule-based
+ * optimizer by a time-based cost estimate for execution type decisions.
  * 
  *  
  */
 public class OptimizerHeuristic extends OptimizerRuleBased
 {
-
-	
-	public static final double EXEC_TIME_THRESHOLD = 60000; //in ms
+	public static final double EXEC_TIME_THRESHOLD = 30000; //in ms
 			
 	@Override
-	public CostModelType getCostModelType() 
-	{
+	public CostModelType getCostModelType() {
 		return CostModelType.RUNTIME_METRICS;
 	}
-
-	@Override
-	public PlanInputType getPlanInputType() 
-	{
-		return PlanInputType.RUNTIME_PLAN;
-	}
 	
 	@Override
-	public POptMode getOptMode() 
-	{
+	public POptMode getOptMode() {
 		return POptMode.HEURISTIC;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
deleted file mode 100644
index 08c02ef..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.InternalTestVariable;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVariable;
-
-/**
- * Internal representation of a test configuration consisting of a logical TestMeasure,
- * a logical TestVariable as well as a DataFormat. Note that one cost function refers
- * to a statistical model of a profiling run for a combination of such a test configuration 
- * and an instruction.
- *
- */
-@Deprecated
-public class PerfTestDef 
-{
-	//logical properties
-	private TestMeasure    _measure;
-	private TestVariable   _lvariable;
-	private DataFormat     _dataformat;
-	
-	//physical properties
-	private InternalTestVariable[] _pvariables;
-	private double         _min;
-	private double         _max;
-	private double         _samples;
-	
-	public PerfTestDef( TestMeasure m, TestVariable lv, DataFormat df, InternalTestVariable pv, double min, double max, double samples )
-	{
-		this( m, lv, df, new InternalTestVariable[]{pv}, min, max, samples);
-	}
-	
-	public PerfTestDef( TestMeasure m, TestVariable lv, DataFormat df, InternalTestVariable[] pv, double min, double max, double samples )
-	{
-		_measure  = m;
-		_lvariable = lv;
-		_dataformat = df;
-		
-		_pvariables = pv;
-		_min = min;
-		_max = max;
-		_samples = samples;
-	}
-	
-	public TestMeasure getMeasure()
-	{
-		return _measure;
-	}
-	
-	public TestVariable getVariable()
-	{
-		return _lvariable;
-	}
-	
-	public DataFormat getDataformat()
-	{
-		return _dataformat;
-	}
-	
-	public InternalTestVariable[] getInternalVariables()
-	{
-		return _pvariables;
-	}
-	
-	public double getMin()
-	{
-		return _min;
-	}
-	
-	public double getMax()
-	{
-		return _max;
-	}
-	
-	public double getNumSamples()
-	{
-		return _samples;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
deleted file mode 100644
index 5a1fac8..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * External function (type CP) used within the PerfTestTool in order to
- * measure the general behavior of package support.
- *
- */
-@Deprecated
-public class PerfTestExtFunctCP extends PackageFunction 
-{	
-	
-	private static final long   serialVersionUID = 1L;
-	private static final String OUTPUT_FILE      = "PerfTestExtFunctOutput";
-	
-	private static IDSequence   _idSeq   = new IDSequence(); 
-	private Matrix              _ret     = null; 
-	private String              _baseDir = null;
-	
-	@Override
-	public int getNumFunctionOutputs() 
-	{
-		return 1;
-	}
-
-	@Override
-	public FunctionParameter getFunctionOutput(int pos) 
-	{
-		return _ret;
-	}
-	
-	public void setBaseDir(String dir)
-	{
-		_baseDir = dir;
-	}
-
-	@Override
-	public void execute() 
-	{
-		try
-		{
-			long id = _idSeq.getNextID();
-			
-			Matrix in = (Matrix) this.getFunctionInput(0);
-			double [][] aIn = in.getMatrixAsDoubleArray();
-			
-			int rows = aIn.length;
-			int cols = aIn[0].length;
-			
-			String dir = _baseDir + "/" + OUTPUT_FILE+id;
-			
-			//copy and write output data 
-			MatrixBlock mb = new MatrixBlock(rows,cols,false);
-			for(int i=0; i < rows; i++)
-				for(int j=0; j < cols; j++)
-					mb.setValue(i, j, aIn[i][j]);
-
-			_ret = new Matrix(dir, rows, cols, ValueType.Double);
-			_ret.setMatrixDoubleArray(mb, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);	
-		}
-		catch(Exception e)
-		{
-			throw new RuntimeException("Error executing generic test extfunct.", e);
-		}
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
deleted file mode 100644
index 0698cbe..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import java.lang.ref.WeakReference;
-
-/**
- * Observer thread for asynchronously monitor the memory consumption.
- * It periodically measures the used memory with period <code>MEASURE_INTERVAL</code>,
- * by explicitly invoking the garbage collector (if required until it is really executed)
- * and afterwards obtaining the currently used memory.
- * 
- * Protocol: (1) measure start, (2) start thread, (3) *do some work*, (4) join thread, (5) get max memory.
- *  
- */
-@Deprecated
-public class PerfTestMemoryObserver implements Runnable
-{
-	
-	public static final int MEASURE_INTERVAL = 50; //in ms 
-	
-	private long    _startMem = -1;
-	private long    _maxMem   = -1; 
-	private boolean _stopped  = false;
-	
-	public PerfTestMemoryObserver()
-	{
-		_startMem = -1;
-		_maxMem   = -1; 
-		_stopped  = false;			
-	}
-
-	public void measureStartMem()
-	{
-		forceGC();
-		_startMem =  Runtime.getRuntime().totalMemory()
-		           - Runtime.getRuntime().freeMemory();
-	}
-
-	public long getMaxMemConsumption()
-	{
-		long val = _maxMem - _startMem;
-		return (val < 0) ? 0 : val; 
-	}
-
-	public void setStopped()
-	{
-		_stopped = true;
-	}
-
-	@Override
-	public void run() 
-	{
-		try
-		{
-			while( !_stopped )
-			{
-				forceGC();
-				long value =   Runtime.getRuntime().totalMemory()
-		                     - Runtime.getRuntime().freeMemory(); 
-				
-				_maxMem = Math.max(value, _maxMem);
-				
-				Thread.sleep( MEASURE_INTERVAL );
-			}
-		}
-		catch(Exception ex)
-		{
-			throw new RuntimeException("Error measuring Java memory usage", ex);
-		}
-	}
-
-	public static double getUsedMemory()
-	{
-		forceGC();
-		return  ( Runtime.getRuntime().totalMemory()
-		           - Runtime.getRuntime().freeMemory() );
-	}
-
-	private static void forceGC()
-	{
-		//request gc until weak reference is eliminated by gc
-		Object o = new Object();
-		WeakReference<Object> ref = new WeakReference<Object>(o); //collected, everytime gc is actually invoked
-		while((o=ref.get())!= null) 
-			System.gc(); //called on purpose, no production use.
-	}
-}
\ No newline at end of file



[4/6] incubator-systemml git commit: [SYSTEMML-1302] Remove parfor perftesttool, cleanup heuristic optimizer

Posted by mb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
deleted file mode 100644
index c130031..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
+++ /dev/null
@@ -1,1411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.StringTokenizer;
-
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLOutputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-import javax.xml.stream.XMLStreamWriter;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLException;
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.MMTSJ.MMTSJType;
-import org.apache.sysml.parser.DMLProgram;
-import org.apache.sysml.parser.DataIdentifier;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.parser.ExternalFunctionStatement;
-import org.apache.sysml.parser.ParseException;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.Program;
-import org.apache.sysml.runtime.controlprogram.ProgramBlock;
-import org.apache.sysml.runtime.controlprogram.caching.CacheException;
-import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
-import org.apache.sysml.runtime.instructions.CPInstructionParser;
-import org.apache.sysml.runtime.instructions.Instruction;
-import org.apache.sysml.runtime.instructions.MRJobInstruction;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
-import org.apache.sysml.runtime.io.IOUtilFunctions;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-import au.com.bytecode.opencsv.CSVReader;
-import au.com.bytecode.opencsv.CSVWriter;
-
-/**
- * DML Instructions Performance Test Tool: 
- * 
- * Creates an offline performance profile (required once per installation) of DML instructions.
- * The profile is a combination of all individual statistical models trained per combination of 
- * instruction and test configuration. In order to train those models, we execute and measure
- * real executions of DML instructions on random input data. Finally, during runtime, the profile
- * is used by the costs estimator in order to create statistic estimates for cost-based optimization.
- * 
- * 
- */
-@Deprecated
-public class PerfTestTool 
-{
-	
-	//public parameters (used for estimation)
-	public static final long    MIN_DATASIZE           = 1000;
-	public static final long    MAX_DATASIZE           = 1000000; 
-	public static final long    DEFAULT_DATASIZE       = 500000;//(MAX_DATASIZE-MIN_DATASIZE)/2;
-	public static final long    DATASIZE_MR_SCALE      = 20;
-	public static final double  MIN_SPARSITY           = 0.1;
-	public static final double  MAX_SPARSITY           = 1.0;
-	public static final double  DEFAULT_SPARSITY       = 0.5;//(MAX_SPARSITY-MIN_SPARSITY)/2;
-	
-	//internal parameters
-	private static final boolean READ_STATS_ON_STARTUP  = false;
-	private static final int     TEST_REPETITIONS       = 10; 
-	private static final int     NUM_SAMPLES_PER_TEST   = 11; 
-	private static final int     MODEL_MAX_ORDER        = 2;
-	private static final boolean MODEL_INTERCEPT        = true;
-	
-	private static final String  PERF_TOOL_DIR          = "./conf/PerfTestTool/";
-//	private static final String  PERF_RESULTS_FNAME     = PERF_TOOL_DIR + "%id%.dat";
-	private static final String  PERF_PROFILE_FNAME     = PERF_TOOL_DIR + "performance_profile.xml";
-	private static final String  DML_SCRIPT_FNAME       = "./src/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml";
-	private static final String  DML_TMP_FNAME          = PERF_TOOL_DIR + "temp.dml";
-	
-	//XML profile tags and attributes
-	private static final String  XML_PROFILE            = "profile";
-	private static final String  XML_DATE               = "date";
-	private static final String  XML_INSTRUCTION        = "instruction";
-	private static final String  XML_ID                 = "id";
-	private static final String  XML_NAME               = "name";
-	private static final String  XML_COSTFUNCTION       = "cost_function";
-	private static final String  XML_MEASURE            = "measure";
-	private static final String  XML_VARIABLE           = "lvariable";
-	private static final String  XML_INTERNAL_VARIABLES = "pvariables";
-	private static final String  XML_DATAFORMAT         = "dataformat";
-	private static final String  XML_ELEMENT_DELIMITER  = "\u002c"; //","; 
-		
-	//ID sequences for instructions and test definitions
-	private static IDSequence _seqInst     = null;
-	private static IDSequence _seqTestDef  = null;
-	
-	//registered instructions and test definitions
-	private static HashMap<Integer, PerfTestDef>   _regTestDef        = null; 
-	private static HashMap<Integer, Instruction>   _regInst           = null;
-	private static HashMap<Integer, String>        _regInst_IDNames   = null;
-	private static HashMap<String, Integer>        _regInst_NamesID   = null;
-	private static HashMap<Integer, Integer[]>     _regInst_IDTestDef = null; 
-	private static HashMap<Integer, Boolean>       _regInst_IDVectors = null;
-	private static HashMap<Integer, IOSchema>      _regInst_IDIOSchema = null;
-	
-	protected static final Log LOG = LogFactory.getLog(PerfTestTool.class.getName());
-	
-	
-	private static Integer[] _defaultConf  = null;
-//	private static Integer[] _MRConf  = null;
-	
-	//raw measurement data (instID, physical defID, results)
-	private static HashMap<Integer,HashMap<Integer,LinkedList<Double>>> _results = null;
-		
-	//profile data 
-	private static boolean    _flagReadData = false; 
-	private static HashMap<Integer,HashMap<Integer,CostFunction>> _profile = null;
-	
-	public enum TestVariable //logical test variable
-	{
-		DATA_SIZE,
-		SPARSITY,
-		PARALLELISM,
-		
-		//some mr specific conf properites
-		SORT_IO_MEM
-	}
-	
-	public enum InternalTestVariable //physical test variable
-	{
-		DATA_SIZE,
-		DIM1_SIZE,
-		DIM2_SIZE,
-		DIM3_SIZE,
-		SPARSITY,
-		SORT_IO_MEM
-	}
-	
-	public enum IOSchema
-	{
-		NONE_NONE,
-		NONE_UNARY,
-		UNARY_UNARY,
-		BINARY_NONE,
-		BINARY_UNARY
-	}
-	
-	public enum TestConstants //logical test constants
-	{
-		DFS_READ_THROUGHPUT,
-		DFS_WRITE_THROUGHPUT,
-		LFS_READ_THROUGHPUT,
-		LFS_WRITE_THROUGHPUT
-	}
-	
-	static
-	{
-		//init repository
-		_seqInst      = new IDSequence();
-		_seqTestDef   = new IDSequence();		
-		_regTestDef   = new HashMap<Integer, PerfTestDef>();
-		_regInst      = new HashMap<Integer, Instruction>();
-		_regInst_IDNames = new HashMap<Integer, String>();
-		_regInst_NamesID = new HashMap<String, Integer>();		
-		_regInst_IDTestDef = new HashMap<Integer, Integer[]>();
-		_regInst_IDVectors = new HashMap<Integer, Boolean>();
-		_regInst_IDIOSchema = new HashMap<Integer, IOSchema>();
-		_results      = new HashMap<Integer, HashMap<Integer,LinkedList<Double>>>();
-		_profile      = new HashMap<Integer, HashMap<Integer,CostFunction>>();
-		_flagReadData = false;
-		
-		//load existing profile if required
-		try
-		{
-			if( READ_STATS_ON_STARTUP )
-				readProfile( PERF_PROFILE_FNAME );
-		}
-		catch(Exception ex)
-		{
-			throw new RuntimeException(ex);
-		}
-	}
-
-	public static void lazyInit() 
-		throws DMLRuntimeException
-	{
-		//read profile for first access
-		if( !_flagReadData )
-		{
-			try
-			{
-				//register all testdefs and instructions
-				registerTestConfigurations();
-				registerInstructions();
-				
-				//read profile
-				readProfile( PERF_PROFILE_FNAME );
-			}
-			catch(Exception ex)
-			{
-				throw new DMLRuntimeException(ex);
-			}	
-		}
-		
-		if( _profile == null )
-			throw new DMLRuntimeException("Performance test results have not been loaded completely.");
-	}
-
-	public static boolean isRegisteredInstruction(String opStr)
-		throws DMLRuntimeException 
-	{
-		//init if required
-		lazyInit();
-		
-		//determine if inst registered
-		return _regInst_NamesID.containsKey(opStr);
-	}
-
-	public static CostFunction getCostFunction( String instName, TestMeasure measure, TestVariable variable, DataFormat dataformat )
-		throws DMLRuntimeException
-	{		
-		//init if required
-		lazyInit();
-		
-		CostFunction tmp = null;
-		int instID = getInstructionID( instName );
-		if( instID != -1 ) //existing profile
-		{
-			int tdefID = getMappedTestDefID(instID, measure, variable, dataformat);		
-			tmp = _profile.get(instID).get(tdefID);
-		}
-		return tmp;
-	}
-
-	@SuppressWarnings("all")
-	public static boolean runTest()
-	{
-		boolean ret = false;
-	
-		try
-		{
-			Timing time = new Timing();
-			time.start();
-			
-			//init caching
-			LazyWriteBuffer.init();
-			
-			//register all testdefs and instructions
-			registerTestConfigurations();
-			registerInstructions();
-			
-			//execute tests for all confs and all instructions
-			executeTest();
-			
-			//compute regression models
-			int rows = NUM_SAMPLES_PER_TEST;
-			int cols = MODEL_MAX_ORDER + (MODEL_INTERCEPT ? 1 : 0);
-			HashMap<Integer,Long> tmp = writeResults( PERF_TOOL_DIR );
-			computeRegressionModels( DML_SCRIPT_FNAME, DML_TMP_FNAME, PERF_TOOL_DIR, tmp.size(), rows, cols);
-			readRegressionModels( PERF_TOOL_DIR, tmp);
-			
-			//execConstantRuntimeTest();
-			//execConstantMemoryTest();
-		
-			//write final profile to XML file
-			writeProfile(PERF_TOOL_DIR, PERF_PROFILE_FNAME);
-			System.out.format("SystemML PERFORMANCE TEST TOOL: finished profiling (in %.2f min), profile written to "+PERF_PROFILE_FNAME+"%n", time.stop()/60000);
-			
-			ret = true;
-		}
-		catch(Exception ex)
-		{
-			LOG.error("Failed to run performance test.", ex);
-		}
-		
-		return ret;
-	}
-
-	private static void registerTestConfigurations()
-	{
-		//reset ID Sequence for consistent IDs
-		_seqTestDef.reset();
-		
-		//register default testdefs //TODO
-		TestMeasure[] M = new TestMeasure[]{ TestMeasure.EXEC_TIME/*, TestMeasure.MEMORY_USAGE*/ };
-		DataFormat[] D =  new DataFormat[]{DataFormat.DENSE/*,DataFormat.SPARSE*/};
-		Integer[] defaultConf = new Integer[M.length*D.length*2];		
-		int i=0;
-		for( TestMeasure m : M ) //for all measures
-			for( DataFormat d : D ) //for all data formats
-			{
-				defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.DATA_SIZE, d, InternalTestVariable.DATA_SIZE,
-                        MIN_DATASIZE, MAX_DATASIZE, NUM_SAMPLES_PER_TEST ) );
-				defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.SPARSITY, d, InternalTestVariable.SPARSITY,
-						MIN_SPARSITY, MAX_SPARSITY, NUM_SAMPLES_PER_TEST ) );
-			}
-		
-
-		//register advanced (multi-dim) test defs
-		//FIXME enable
-		/*for( TestMeasure m : M ) //for all measures
-			for( DataFormat d : D ) //for all data formats
-			{
-				registerTestDef( new PerfTestDef( m, TestVariable.DATA_SIZE, d,
-                        new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}, 
-                        MIN_DIMSIZE, MAX_DIMSIZE, NUM_SAMPLES_PER_TEST ) );
-			}?*
-
-			
-		//register MR specific instructions FIXME: just for test
-		/*Integer[] mrConf = new Integer[D.length];
-		i = 0;
-		for( DataFormat d : D )
-		{
-			mrConf[i++] = registerTestDef( new PerfTestDef(TestMeasure.EXEC_TIME, TestVariable.SORT_IO_MEM, d,
-					                         InternalTestVariable.SORT_IO_MEM,
-				                             MIN_SORT_IO_MEM, MAX_SORT_IO_MEM, NUM_SAMPLES_PER_TEST ) );
-		}*/
-		
-		//set default testdefs
-		_defaultConf = defaultConf;
-		//_MRConf = mrConf;
-	}
-
-	private static void registerInstructions() 
-		throws DMLRuntimeException
-	{
-		//reset ID sequences for consistent IDs
-		_seqInst.reset();
-		
-		///////
-		// CP instructions
-		
-		//matrix multiply mmtsj
-		registerInstruction( "CP"+Lop.OPERAND_DELIMITOR+"tsmm", CPInstructionParser.parseSingleInstruction("CP"+Lop.OPERAND_DELIMITOR+"tsmm"+Lop.OPERAND_DELIMITOR+"A"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+"C"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+MMTSJType.LEFT),
-						     getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); 
-		
-		/*
-		//matrix multiply 
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-						     getDefaultTestDefs(), false, IOSchema.BINARY_UNARY ); 
-		////registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-		////		             changeToMuliDimTestDefs(TestVariable.DATA_SIZE, getDefaultTestDefs()) ); 
-		//rand
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"Rand", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"Rand"+Lops.OPERAND_DELIMITOR+"rows=1"+Lops.OPERAND_DELIMITOR+"cols=1"+Lops.OPERAND_DELIMITOR+"rowsInBlock=1000"+Lops.OPERAND_DELIMITOR+"colsInBlock=1000"+Lops.OPERAND_DELIMITOR+"min=1.0"+Lops.OPERAND_DELIMITOR+"max=100.0"+Lops.OPERAND_DELIMITOR+"sparsity=1.0"+Lops.OPERAND_DELIMITOR+"seed=7"+Lops.OPERAND_DELIMITOR+"pdf=uniform"+Lops.OPERAND_DELIMITOR+"dir=."+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-				 			 getDefaultTestDefs(), false, IOSchema.NONE_UNARY );
-		//matrix transpose
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"r'", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"r'"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-	 			 			 getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
-		//sum
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"uak+", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"uak+"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), //needs B instead of C
-	 			             getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
-		//external function
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"extfunct", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"extfunct"+Lops.OPERAND_DELIMITOR+DMLProgram.DEFAULT_NAMESPACE+""+Lops.OPERAND_DELIMITOR+"execPerfTestExtFunct"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"A"+Lops.OPERAND_DELIMITOR+"C"),
-	                         getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );		
-		//central moment
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cm", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cm"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"2"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"INT"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-	            			 getDefaultTestDefs(), true, IOSchema.UNARY_NONE ); 
-		//co-variance
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cov", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cov"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-     						 getDefaultTestDefs(), true, IOSchema.BINARY_NONE );
-		*/
-		
-		/*
-		///////
-		// MR instructions
-		registerInstruction( "jobtypeMMRJ", createMRJobInstruction(JobType.MMRJ,
-							                    MRInstructionParser.parseSingleInstruction("MR"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "rmm"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "0"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "1"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "2"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE ")),
-							 _MRConf, false, IOSchema.BINARY_UNARY ); 		
-
-		*/
-		/*ADD ADDITIONAL INSTRUCTIONS HERE*/
-		
-		
-		
-		//extend list to all (expensive) instructions; maybe also: createvar, assignvar, cpvar, rm, mv, setfilename, rmfilevar
-		
-	}
-	
-/*
-	private static Instruction createMRJobInstruction(JobType type, MRInstruction inst) 
-	{
-		MRJobInstruction mrinst = new MRJobInstruction(type);
-		
-		if( type == JobType.MMRJ )
-		{
-			ArrayList<String> inLab = new ArrayList<String>();
-			ArrayList<String> outLab = new ArrayList<String>();
-			inLab.add("A");
-			inLab.add("B");
-			outLab.add("C");
-			
-			mrinst.setMMRJInstructions(new String[]{"A","B"}, 
-									   "", 
-									   inst.toString(), 
-									   "", 
-									   "", 
-									   new String[]{"C"},
-									   new byte[]{2},
-									   10, 1 );
-			
-		}
-		
-		
-		return mrinst;
-	}
-*/
-
-	private static int registerTestDef( PerfTestDef def )
-	{
-		int ID = (int)_seqTestDef.getNextID();
-		
-		_regTestDef.put( ID, def );
-		
-		return ID;
-	}
-
-	private static void registerInstruction( String iname, Instruction inst, Integer[] testDefIDs, boolean vectors, IOSchema schema )
-	{
-		int ID = (int)_seqInst.getNextID();
-		registerInstruction(ID, iname, inst, testDefIDs, vectors, schema);
-	}
-
-	private static void registerInstruction( int ID, String iname, Instruction inst, Integer[] testDefIDs, boolean vector, IOSchema schema )
-	{
-		_regInst.put( ID, inst );
-		_regInst_IDNames.put( ID, iname );
-		_regInst_NamesID.put( iname, ID );
-		_regInst_IDTestDef.put( ID, testDefIDs );
-		_regInst_IDVectors.put( ID, vector );
-		_regInst_IDIOSchema.put( ID, schema );
-	}
-
-	private static int getMappedTestDefID( int instID, TestMeasure measure, TestVariable variable, DataFormat dataformat )
-	{
-		int ret = -1;
-		
-		for( Integer defID : _regInst_IDTestDef.get(instID) )
-		{
-			PerfTestDef def = _regTestDef.get(defID);
-			if(   def.getMeasure()==measure 
-				&& def.getVariable()==variable 
-				&& def.getDataformat()==dataformat )
-			{
-				ret = defID;
-				break;
-			}
-		}
-		
-		return ret;
-	}
-
-	@SuppressWarnings("unused")
-	private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable pvariable )
-	{
-		return getTestDefID(measure, lvariable, dataformat, new InternalTestVariable[]{pvariable});
-	}
-
-	private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable[] pvariables )
-	{
-		int ret = -1;
-		
-		for( Entry<Integer,PerfTestDef> e : _regTestDef.entrySet() )
-		{
-			PerfTestDef def = e.getValue();
-			TestMeasure tmp1 = def.getMeasure();
-			TestVariable tmp2 = def.getVariable();
-			DataFormat tmp3 = def.getDataformat();
-			InternalTestVariable[] tmp4 = def.getInternalVariables();
-			
-			if( tmp1==measure && tmp2==lvariable && tmp3==dataformat )
-			{
-				boolean flag = true;
-				for( int i=0; i<tmp4.length; i++ )
-					flag &= ( tmp4[i] == pvariables[i] );	
-				
-				if( flag )
-				{
-					ret = e.getKey();
-					break;
-				}
-			}
-		}
-
-		return ret;
-	}
-
-	private static int getInstructionID( String instName )
-	{
-		Integer ret = _regInst_NamesID.get( instName );
-		return ( ret!=null )? ret : -1;
-	}
-
-	@SuppressWarnings("unused")
-	private static Integer[] getAllTestDefs()
-	{
-		return _regTestDef.keySet().toArray(new Integer[0]);
-	}
-
-	private static Integer[] getDefaultTestDefs()
-	{
-		return _defaultConf;
-	}
-
-	@SuppressWarnings("unused")
-	private static Integer[] changeToMuliDimTestDefs( TestVariable v, Integer[] IDs )
-	{
-		Integer[] tmp = new Integer[IDs.length];
-		
-		for( int i=0; i<tmp.length; i++ )
-		{
-			PerfTestDef def = _regTestDef.get(IDs[i]);
-			if( def.getVariable() == v ) //filter logical variables
-			{
-				//find multidim version
-				InternalTestVariable[] in = null;
-				switch( v )
-				{
-					case DATA_SIZE: 
-						in = new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}; 
-						break;
-					default:
-						//do nothing
-				}
-				
-				int newid = getTestDefID(def.getMeasure(), def.getVariable(), def.getDataformat(), in );
-				
-				//exchange testdef ID
-				tmp[i] = newid;
-			}
-			else
-			{
-				tmp[i] = IDs[i];
-			}
-		}
-		
-		return tmp;
-	}
-
-	private static void executeTest( ) 
-		throws DMLRuntimeException, IOException
-	{
-		System.out.println("SystemML PERFORMANCE TEST TOOL:");
-		
-		//foreach registered instruction	
-		for( Entry<Integer,Instruction> inst : _regInst.entrySet() )
-		{
-			int instID = inst.getKey();
-			System.out.println( "Running INSTRUCTION "+_regInst_IDNames.get(instID) );
-		
-			Integer[] testDefIDs = _regInst_IDTestDef.get(instID);
-			boolean vectors = _regInst_IDVectors.get(instID);
-			IOSchema schema = _regInst_IDIOSchema.get(instID);
-			
-			//create tmp program block and set instruction
-			Program prog = new Program();
-			ProgramBlock pb = new ProgramBlock( prog );
-			ArrayList<Instruction> ainst = new ArrayList<Instruction>();
-			ainst.add( inst.getValue() );
-			pb.setInstructions(ainst);
-			
-			ExecutionContext ec = ExecutionContextFactory.createContext();
-			
-			//foreach registered test configuration
-			for( Integer defID : testDefIDs )
-			{
-				PerfTestDef def = _regTestDef.get(defID);
-				TestMeasure m = def.getMeasure();
-				TestVariable lv = def.getVariable();
-				DataFormat df = def.getDataformat();
-				InternalTestVariable[] pv = def.getInternalVariables();
-				double min = def.getMin();
-				double max = def.getMax();
-				double samples = def.getNumSamples();
-				
-				System.out.println( "Running TESTDEF(measure="+m+", variable="+String.valueOf(lv)+" "+pv.length+", format="+String.valueOf(df)+")" );
-				
-				//vary input variable
-				LinkedList<Double> dmeasure = new LinkedList<Double>();
-				LinkedList<Double> dvariable = generateSequence(min, max, samples);					
-				int plen = pv.length;
-				
-				if( plen == 1 ) //1D function 
-				{
-					for( Double var : dvariable )
-					{
-						dmeasure.add(executeTestCase1D(m, pv[0], df, var, pb, vectors, schema, ec));
-					}
-				}
-				else //multi-dim function
-				{
-					//init index stack
-					int[] index = new int[plen];
-					for( int i=0; i<plen; i++ )
-						index[i] = 0;
-					
-					//execute test 
-					int dlen = dvariable.size();
-					double[] buff = new double[plen];
-					while( index[0]<dlen )
-					{
-						//set buffer values
-						for( int i=0; i<plen; i++ )
-							buff[i] = dvariable.get(index[i]);
-						
-						//core execution
-						dmeasure.add(executeTestCaseMD(m, pv, df, buff, pb, schema, ec)); //not applicable for vector flag
-						
-						//increment indexes
-						for( int i=plen-1; i>=0; i-- )
-						{
-							if(i==plen-1)
-								index[i]++;
-							else if( index[i+1] >= dlen )
-							{
-								index[i]++;
-								index[i+1]=0;
-							}
-						}
-					}
-				}
-				
-								
-				//append values to results
-				if( !_results.containsKey(instID) )
-					_results.put(instID, new HashMap<Integer, LinkedList<Double>>());
-				_results.get(instID).put(defID, dmeasure);
-	
-			}
-		}
-	}
-
-	private static double executeTestCase1D( TestMeasure m, InternalTestVariable v, DataFormat df, double varValue, ProgramBlock pb, boolean vectors, IOSchema schema, ExecutionContext ec ) 
-		throws DMLRuntimeException, IOException
-	{
-		double datasize = -1;
-		double dim1 = -1, dim2 = -1;
-		double sparsity = -1;
-		//double sortio = -1;
-		
-		System.out.println( "VAR VALUE "+varValue );
-	
-		//set test variables
-		switch ( v )
-		{
-			case DATA_SIZE:
-				datasize = varValue;
-				sparsity = DEFAULT_SPARSITY;
-				break;
-			case SPARSITY:
-				datasize = DEFAULT_DATASIZE;
-				sparsity = varValue;
-				break;
-			case SORT_IO_MEM: //FIXME
-				datasize = DEFAULT_DATASIZE * DATASIZE_MR_SCALE;
-				sparsity = DEFAULT_SPARSITY;
-				//sortio = varValue;
-				break;	
-			default:
-				//do nothing
-		}
-		
-		//set specific dimensions
-		if( vectors )
-		{
-			dim1 = datasize;
-			dim2 = 1;
-		}
-		else
-		{
-			dim1 = Math.sqrt( datasize );
-			dim2 = dim1;
-		}
-		
-		//instruction-specific configurations
-		Instruction inst = pb.getInstruction(0); //always exactly one instruction
-		if( inst instanceof DataGenCPInstruction )
-		{
-			DataGenCPInstruction rand = (DataGenCPInstruction) inst;
-			rand.setRows((long)dim1);
-			rand.setCols((long)dim2);
-			rand.setSparsity(sparsity);
-		}
-		else if ( inst instanceof FunctionCallCPInstruction ) //ExternalFunctionInvocationInstruction
-		{
-			Program prog = pb.getProgram();
-			ArrayList<DataIdentifier> in = new ArrayList<DataIdentifier>();
-			DataIdentifier dat1 = new DataIdentifier("A");
-			dat1.setDataType(DataType.MATRIX);
-			dat1.setValueType(ValueType.DOUBLE);
-			in.add(dat1);
-			ArrayList<DataIdentifier> out = new ArrayList<DataIdentifier>();
-			DataIdentifier dat2 = new DataIdentifier("C");
-			dat2.setDataType(DataType.MATRIX);
-			dat2.setValueType(ValueType.DOUBLE);
-			out.add(dat2);
-			HashMap<String, String> params = new HashMap<String, String>();
-			params.put(ExternalFunctionStatement.CLASS_NAME, PerfTestExtFunctCP.class.getName());			
-			ExternalFunctionProgramBlockCP fpb = new ExternalFunctionProgramBlockCP(prog, in, out, params, PERF_TOOL_DIR);	
-			prog.addFunctionProgramBlock(DMLProgram.DEFAULT_NAMESPACE, "execPerfTestExtFunct", fpb);
-		}
-		else if ( inst instanceof MRJobInstruction )
-		{
-			//FIXME hardcoded for test
-			//MMRJMR.SORT_IO_MEM = sortio;
-		}
-		
-		//generate input and output matrices
-		LocalVariableMap vars = ec.getVariables();
-		vars.removeAll();
-		double mem1 = PerfTestMemoryObserver.getUsedMemory();
-		if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
-			vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
-			vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
-			vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim2, df));
-		double mem2 = PerfTestMemoryObserver.getUsedMemory();
-		
-		//foreach repetition
-		double value = 0;
-		for( int i=0; i<TEST_REPETITIONS; i++ )
-		{
-			System.out.println("run "+i);
-			value += executeGenericProgramBlock( m, pb, ec );
-		}
-		value/=TEST_REPETITIONS;
-		
-		//result correction and print result
-		switch( m )
-		{
-			case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
-			case MEMORY_USAGE: 
-				//System.out.println("--- RESULT: "+value+" byte"); 
-				if( (mem2-mem1) > 0 )
-					value = value + mem2-mem1; //correction: input sizes added
-				System.out.println("--- RESULT: "+value+" byte"); break;
-			default: System.out.println("--- RESULT: "+value); break;
-		}
-		
-		return value;
-	}
-
-	private static double executeTestCaseMD( TestMeasure m, InternalTestVariable[] v, DataFormat df, double[] varValue, ProgramBlock pb, IOSchema schema, ExecutionContext ec ) 
-		throws DMLRuntimeException, IOException
-	{
-		//double datasize = DEFAULT_DATASIZE;
-		double sparsity = DEFAULT_SPARSITY;
-		double dim1 = -1;
-		double dim2 = -1;
-		double dim3 = -1;
-
-		
-		for( int i=0; i<v.length; i++ )
-		{
-			System.out.println( "VAR VALUE "+varValue[i] );
-				
-			switch( v[i] )
-			{
-				case DIM1_SIZE: dim1=varValue[i]; break;
-				case DIM2_SIZE: dim2=varValue[i]; break;
-				case DIM3_SIZE: dim3=varValue[i]; break;
-				default: //do nothing
-			}
-		}
-		
-		//generate input and output matrices
-		LocalVariableMap vars = ec.getVariables();
-		vars.removeAll();
-		double mem1 = PerfTestMemoryObserver.getUsedMemory();
-		if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
-			 vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
-			 vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim2, dim3, sparsity, df));
-		if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
-			vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim3, df));
-		double mem2 = PerfTestMemoryObserver.getUsedMemory();
-		
-		//foreach repetition
-		double value = 0;
-		for( int i=0; i<TEST_REPETITIONS; i++ )
-		{
-			System.out.println("run "+i);
-			value += executeGenericProgramBlock( m, pb, ec );
-		}
-		value/=TEST_REPETITIONS;
-		
-		//result correction and print result
-		switch( m )
-		{
-			case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
-			case MEMORY_USAGE: 
-				//System.out.println("--- RESULT: "+value+" byte"); 
-				if( (mem2-mem1) > 0 )
-					value = value + mem2-mem1; //correction: input sizes added
-				System.out.println("--- RESULT: "+value+" byte"); break;
-			default: System.out.println("--- RESULT: "+value); break;
-		}
-		
-		return value;
-	}
-
-	public static double executeGenericProgramBlock( TestMeasure measure, ProgramBlock pb, ExecutionContext ec ) 
-		throws DMLRuntimeException
-	{
-		double value = 0;
-		try
-		{
-			switch( measure )
-			{
-			 	case EXEC_TIME: 
-			 		Timing time = new Timing(); 
-			 		time.start();
-			 		pb.execute( ec );
-			 		value = time.stop();
-			 		break;
-			 	case MEMORY_USAGE:
-			 		PerfTestMemoryObserver mo = new PerfTestMemoryObserver();
-			 		mo.measureStartMem();
-			 		Thread t = new Thread(mo);
-			 		t.start();
-			 		pb.execute( ec );
-			 		mo.setStopped();
-			 		value = mo.getMaxMemConsumption();
-			 		t.join();
-			 		break;
-			}
-		}
-		catch(Exception ex)
-		{
-			throw new DMLRuntimeException(ex);
-		}
-		
-		//clear matrixes from cache
-		for( String str : ec.getVariables().keySet() )
-		{
-			Data dat = ec.getVariable(str); 
-			if( dat instanceof MatrixObject )
-				((MatrixObject)dat).clearData();		
-		}
-		
-		return value;
-	}
-
-	public static LinkedList<Double> generateSequence( double min, double max, double num )
-	{
-		LinkedList<Double> data = new LinkedList<Double>();
-		double increment = (max-min)/(num-1);
-		
-		for( int i=0; i<num; i++ )
-			data.add( Double.valueOf(min+i*increment) );
-		
-		return data;
-	}
-
-	public static MatrixObject generateInputDataset(String fname, double dim1, double dim2, double sparsity, DataFormat df) 
-		throws IOException, CacheException
-	{		
-		int d1 = (int) dim1;
-		int d2 = (int) dim2;
-		
-		System.out.println(d1+" "+d2);
-		
-		//create random test data
-		double[][] d = generateTestMatrix(d1, d2, 1, 100, sparsity, 7);
-		
-		//create matrix block
-		MatrixBlock mb = null;
-		switch( df ) 
-		{
-			case DENSE:
-				mb = new MatrixBlock(d1,d2,false);
-				break;
-			case SPARSE:
-				mb = new MatrixBlock(d1,d2,true, (int)(sparsity*dim1*dim2));
-				break;
-		}
-		
-		//insert data
-		for(int i=0; i < d1; i++)
-			for(int j=0; j < d2; j++)
-				if( d[i][j]!=0 )
-					mb.setValue(i, j, d[i][j]);		
-		
-		MapReduceTool.deleteFileIfExistOnHDFS(fname);
-		
-		MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
-		MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
-		mo.acquireModify(mb);
-		mo.release();
-		mo.exportData(); //write to HDFS
-		
-		return mo;
-	}
-
-	public static MatrixObject generateEmptyResult(String fname, double dim1, double dim2, DataFormat df ) 
-		throws IOException, CacheException
-	{
-		int d1 = (int)dim1;
-		int d2 = (int)dim2;
-		
-		/*
-		MatrixBlock mb = null;
-		switch( df ) 
-		{
-			case DENSE:
-				mb = new MatrixBlock(dim,dim,false);
-				break;
-			case SPARSE:
-				mb = new MatrixBlock(dim,dim,true);
-				break;
-		}*/
-		
-		MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
-		MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
-		
-		return mo;
-	}
-	
-
-	/**
-	 * NOTE: This is a copy of TestUtils.generateTestMatrix, it was replicated in order to prevent
-	 * dependency of SystemML.jar to our test package.
-	 * 
-	 * @param rows number of rows
-	 * @param cols number of columns
-	 * @param min minimum value
-	 * @param max maximum value
-	 * @param sparsity sparsity as a percentage
-	 * @param seed random seed value (-1 if use System time)
-	 * @return matrix as 2D double array
-	 */
-	public static double[][] generateTestMatrix(int rows, int cols, double min, double max, double sparsity, long seed) {
-		double[][] matrix = new double[rows][cols];
-		Random random;
-		if (seed == -1)
-			random = new Random(System.nanoTime());
-		else
-			random = new Random(seed);
-
-		for (int i = 0; i < rows; i++) {
-			for (int j = 0; j < cols; j++) {
-				if (random.nextDouble() > sparsity)
-					continue;
-				matrix[i][j] = (random.nextDouble() * (max - min) + min);
-			}
-		}
-
-		return matrix;
-	}
-
-	@SuppressWarnings("all")
-	private static HashMap<Integer,Long> writeResults( String dirname ) 
-		throws IOException, DMLRuntimeException 
-	{
-		HashMap<Integer,Long> map = new HashMap<Integer, Long>();
-		int count = 1;
-		int offset = (MODEL_INTERCEPT ? 1 : 0);
-		int cols = MODEL_MAX_ORDER + offset;
-		
-		for( Entry<Integer,HashMap<Integer,LinkedList<Double>>> inst : _results.entrySet() )
-		{
-			int instID = inst.getKey();
-			HashMap<Integer,LinkedList<Double>> instCF = inst.getValue();
-			
-			for( Entry<Integer,LinkedList<Double>> cfun : instCF.entrySet() )
-			{
-				int tDefID = cfun.getKey();
-				long ID = IDHandler.concatIntIDsToLong(instID, tDefID);
-				LinkedList<Double> dmeasure = cfun.getValue();
-				
-				PerfTestDef def = _regTestDef.get(tDefID);
-				LinkedList<Double> dvariable = generateSequence(def.getMin(), def.getMax(), NUM_SAMPLES_PER_TEST);
-				int dlen = dvariable.size();
-				int plen = def.getInternalVariables().length;
-				
-				//write variable data set
-				CSVWriter writer1 = new CSVWriter( new FileWriter( dirname+count+"_in1.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);						
-				if( plen == 1 ) //one dimensional function
-				{
-					//write 1, x, x^2, x^3, ...
-					String[] sbuff = new String[cols];
-					for( Double val : dvariable )
-		    		{
-		    			for( int j=0; j<cols; j++ )
-	    					sbuff[j] = String.valueOf( Math.pow(val, j+1-offset) );
-					    writer1.writeNext(sbuff);
-		    		}
-				}
-				else // multi-dimensional function
-				{
-					//write 1, x,y,z,x^2,y^2,z^2, xy, xz, yz, xyz
-					
-					String[] sbuff = new String[(int)Math.pow(2,plen)-1+plen+offset-1]; 
-					//String[] sbuff = new String[plen+offset];
-					if(offset==1)
-						sbuff[0]="1";
-					
-					//init index stack
-					int[] index = new int[plen];
-					for( int i=0; i<plen; i++ )
-						index[i] = 0;
-					
-					//execute test 
-					double[] buff = new double[plen];
-					while( index[0]<dlen )
-					{
-						//set buffer values
-						for( int i=0; i<plen; i++ )
-							buff[i] = dvariable.get(index[i]);
-						
-						//core writing
-						for( int i=1; i<=plen; i++ )
-						{
-							if( i==1 )
-							{
-								for( int j=0; j<plen; j++ )
-									sbuff[offset+j] = String.valueOf( buff[j] );
-								for( int j=0; j<plen; j++ )
-									sbuff[offset+plen+j] = String.valueOf( Math.pow(buff[j],2) );
-							}
-							else if( i==2 )
-							{
-								int ix=0;
-								for( int j=0; j<plen-1; j++ )
-									for( int k=j+1; k<plen; k++, ix++ )
-										sbuff[offset+2*plen+ix] = String.valueOf( buff[j]*buff[k] );
-							}
-							else if( i==plen )
-							{
-								//double tmp=1;
-								//for( int j=0; j<plen; j++ )
-								//	tmp *= buff[j];
-								//sbuff[offset+2*plen+plen*(plen-1)/2] = String.valueOf(tmp);
-							}
-							else
-								throw new DMLRuntimeException("More than 3 dims currently not supported.");
-								
-						}
-							
-						//for( int i=0; i<plen; i++ )	
-	    				//	sbuff[offset+i] = String.valueOf( buff[i] );
-						
-					    writer1.writeNext(sbuff);
-
-						//increment indexes
-						for( int i=plen-1; i>=0; i-- )
-						{
-							if(i==plen-1)
-								index[i]++;
-							else if( index[i+1] >= dlen )
-							{
-								index[i]++;
-								index[i+1]=0;
-							}
-						}
-					}
-				}				
-			    writer1.close();
-				
-			    
-				//write measure data set
-				CSVWriter writer2 = new CSVWriter( new FileWriter( dirname+count+"_in2.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);		
-				String[] buff2 = new String[1];
-				for( Double val : dmeasure )
-				{
-					buff2[0] = String.valueOf( val );
-					writer2.writeNext(buff2);
-				}
-				writer2.close();
-			
-				map.put(count, ID);
-				count++;
-			}
-		}
-		
-		return map;
-	}
-
-	private static void computeRegressionModels( String dmlname, String dmltmpname, String dir, int models, int rows, int cols ) 
-		throws IOException, ParseException, DMLException
-	{
-		//clean scratch space 
-		//AutomatedTestBase.cleanupScratchSpace();
-		
-		//read DML template
-		StringBuilder buffer = new StringBuilder();
-		BufferedReader br = new BufferedReader( new FileReader(new File( dmlname )) );
-	
-		try
-		{
-			String line = null;
-			while( (line=br.readLine()) != null )
-			{
-				buffer.append(line);
-				buffer.append("\n");
-			}
-		}
-		finally
-		{
-			if( br != null )
-				br.close();
-		}
-		
-		//replace parameters
-		String template = buffer.toString();
-		template = template.replaceAll("%numModels%", String.valueOf(models));
-		template = template.replaceAll("%numRows%", String.valueOf(rows));
-		template = template.replaceAll("%numCols%", String.valueOf(cols));
-		template = template.replaceAll("%indir%", String.valueOf(dir));
-		
-		// write temp DML file
-		File fout = new File(dmltmpname);
-		FileOutputStream fos = new FileOutputStream(fout);
-		try {
-			fos.write(template.getBytes());
-		}
-		finally
-		{
-			if( fos != null )
-				fos.close();
-		}
-		
-		// execute DML script
-		DMLScript.main(new String[] { "-f", dmltmpname });
-	}
-
-	private static void readRegressionModels( String dname, HashMap<Integer,Long> IDMapping ) 
-		throws IOException
-	{
-		for( Entry<Integer,Long> e : IDMapping.entrySet() )
-		{
-			int count = e.getKey();
-			long ID = e.getValue();
-			int instID = IDHandler.extractIntIDFromLong(ID, 1);
-			int tDefID = IDHandler.extractIntIDFromLong(ID, 2);
-			
-			//read file and parse
-			LinkedList<Double> params = new LinkedList<Double>();
-			CSVReader reader1 = new CSVReader( new FileReader(dname+count+"_out.csv"), ',' );
-			String[] nextline = null;
-			while( (nextline = reader1.readNext()) != null )
-			{
-				params.add(Double.parseDouble(nextline[0]));
-			}
-			reader1.close();
-			
-			double[] dparams = new double[params.size()];
-			int i=0;
-			for( Double d : params )
-			{
-				dparams[i] = d;
-				i++;
-			}
-			
-			//create new cost function
-			boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
-			CostFunction cf = new CostFunction(dparams, multidim); 
-			
-			//append to profile
-			if( !_profile.containsKey(instID) )
-				_profile.put(instID, new HashMap<Integer, CostFunction>());
-			_profile.get(instID).put(tDefID, cf);
-		}
-	}
-
-	private static String serializeTestVariables( InternalTestVariable[] vars )
-	{
-		StringBuilder sb = new StringBuilder();
-		for( int i=0; i<vars.length; i++ )
-		{
-			if( i>0 )
-				sb.append( XML_ELEMENT_DELIMITER );
-			sb.append( String.valueOf(vars[i]) );
-		}
-		return sb.toString();
-	}
-
-	private static InternalTestVariable[] parseTestVariables(String vars)
-	{
-		StringTokenizer st = new StringTokenizer(vars, XML_ELEMENT_DELIMITER);
-		InternalTestVariable[] v = new InternalTestVariable[st.countTokens()];
-		for( int i=0; i<v.length; i++ )
-			v[i] = InternalTestVariable.valueOf(st.nextToken());
-		return v;
-	}
-
-	private static String serializeParams( double[] vals )
-	{
-		StringBuilder sb = new StringBuilder();
-		for( int i=0; i<vals.length; i++ )
-		{
-			if( i>0 )
-				sb.append( XML_ELEMENT_DELIMITER );
-			sb.append( String.valueOf(vals[i]) );
-		}
-		return sb.toString();
-	}
-
-	private static double[] parseParams( String valStr )
-	{
-		StringTokenizer st = new StringTokenizer(valStr, XML_ELEMENT_DELIMITER);
-		double[] params = new double[st.countTokens()];
-		for( int i=0; i<params.length; i++ )
-			params[i] = Double.parseDouble(st.nextToken());
-		return params;
-	}
-
-	private static void readProfile( String fname ) 
-		throws XMLStreamException, IOException
-	{
-		//init profile map
-		_profile = new HashMap<Integer, HashMap<Integer,CostFunction>>();
-		
-		//read existing profile
-		FileInputStream fis = new FileInputStream( fname );
-
-		try
-		{
-			//xml parsing
-			XMLInputFactory xif = XMLInputFactory.newInstance();
-			XMLStreamReader xsr = xif.createXMLStreamReader( fis );
-			
-			int e = xsr.nextTag(); // profile start
-			
-			while( true ) //read all instructions
-			{
-				e = xsr.nextTag(); // instruction start
-				if( e == XMLStreamConstants.END_ELEMENT )
-					break; //reached profile end tag
-				
-				//parse instruction
-				int ID = Integer.parseInt( xsr.getAttributeValue(null, XML_ID) );
-				//String name = xsr.getAttributeValue(null, XML_NAME).trim().replaceAll(" ", Lops.OPERAND_DELIMITOR);
-				HashMap<Integer, CostFunction> tmp = new HashMap<Integer, CostFunction>();
-				_profile.put( ID, tmp );
-				
-				while( true )
-				{
-					e = xsr.nextTag(); // cost function start
-					if( e == XMLStreamConstants.END_ELEMENT )
-						break; //reached instruction end tag
-					
-					//parse cost function
-					TestMeasure m = TestMeasure.valueOf( xsr.getAttributeValue(null, XML_MEASURE) );
-					TestVariable lv = TestVariable.valueOf( xsr.getAttributeValue(null, XML_VARIABLE) );
-					InternalTestVariable[] pv = parseTestVariables( xsr.getAttributeValue(null, XML_INTERNAL_VARIABLES) );
-					DataFormat df = DataFormat.valueOf( xsr.getAttributeValue(null, XML_DATAFORMAT) );
-					int tDefID = getTestDefID(m, lv, df, pv);
-					
-					xsr.next(); //read characters
-					double[] params = parseParams(xsr.getText());
-					boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
-					CostFunction cf = new CostFunction( params, multidim );
-					tmp.put(tDefID, cf);
-				
-					xsr.nextTag(); // cost function end
-					//System.out.println("added cost function");
-				}
-			}
-			xsr.close();
-		}
-		finally
-		{
-			IOUtilFunctions.closeSilently(fis);
-		}
-		
-		//mark profile as successfully read
-		_flagReadData = true;
-	}
-	
-	/**
-	 * StAX for efficient streaming XML writing.
-	 * 
-	 * @param dname directory name
-	 * @param fname file name
-	 * @throws IOException if IOException occurs
-	 * @throws XMLStreamException if XMLStreamException occurs
-	 */
-	private static void writeProfile( String dname, String fname ) 
-		throws IOException, XMLStreamException 
-	{
-		//create initial directory and file 
-		File dir =  new File( dname );
-		if( !dir.exists() )
-			dir.mkdir();
-		File f = new File( fname );
-		f.createNewFile();
-		
-		FileOutputStream fos = new FileOutputStream( f );
-		
-		try
-		{
-			//create document
-			XMLOutputFactory xof = XMLOutputFactory.newInstance();
-			XMLStreamWriter xsw = xof.createXMLStreamWriter( fos );
-			//TODO use an alternative way for intentation
-			//xsw = new IndentingXMLStreamWriter( xsw ); //remove this line if no indenting required
-			
-			//write document content
-			xsw.writeStartDocument();
-			xsw.writeStartElement( XML_PROFILE );
-			xsw.writeAttribute(XML_DATE, String.valueOf(new Date()) );
-			
-			//foreach instruction (boundle of cost functions)
-			for( Entry<Integer,HashMap<Integer,CostFunction>> inst : _profile.entrySet() )
-			{
-				int instID = inst.getKey();
-				String instName = _regInst_IDNames.get( instID );
-						
-				xsw.writeStartElement( XML_INSTRUCTION ); 
-				xsw.writeAttribute(XML_ID, String.valueOf( instID ));
-				xsw.writeAttribute(XML_NAME, instName.replaceAll(Lop.OPERAND_DELIMITOR, " "));
-				
-				//foreach testdef cost function
-				for( Entry<Integer,CostFunction> cfun : inst.getValue().entrySet() )
-				{
-					int tdefID = cfun.getKey();
-					PerfTestDef def = _regTestDef.get(tdefID);
-					CostFunction cf = cfun.getValue();
-					
-					xsw.writeStartElement( XML_COSTFUNCTION );
-					xsw.writeAttribute( XML_ID, String.valueOf( tdefID ));
-					xsw.writeAttribute( XML_MEASURE, def.getMeasure().toString() );
-					xsw.writeAttribute( XML_VARIABLE, def.getVariable().toString() );
-					xsw.writeAttribute( XML_INTERNAL_VARIABLES, serializeTestVariables(def.getInternalVariables()) );
-					xsw.writeAttribute( XML_DATAFORMAT, def.getDataformat().toString() );
-					xsw.writeCharacters(serializeParams( cf.getParams() ));
-					xsw.writeEndElement();// XML_COSTFUNCTION
-				}
-				
-				xsw.writeEndElement(); //XML_INSTRUCTION
-			}
-			
-			xsw.writeEndElement();//XML_PROFILE
-			xsw.writeEndDocument();
-			xsw.close();
-		}
-		finally
-		{
-			IOUtilFunctions.closeSilently(fos);
-		}
-	}
-
-	
-	
-	/**
-	 * Main for invoking the actual performance test in order to produce profile.xml
-	 * 
-	 * @param args string arguments to main() method
-	 */
-	public static void main(String[] args)
-	{
-		//execute the local / remote performance test
-		PerfTestTool.runTest(); 
-	}
-
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
deleted file mode 100644
index c216d52..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
+++ /dev/null
@@ -1,59 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#PerfTestTool: DML template for estimation cost functions.
-#Deprecated in SystemML 0.13
-
-dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem") 
-
-dynWrite = externalFunction(Matrix[Double] R, String fname) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem") 
-
-solve = externalFunction(Matrix[Double] A, Matrix[Double] y) 
-return (Matrix[Double] b) 
-implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem") 
-
-k = %numModels%;
-m = -1; 
-n = -1;
-
-dummy = matrix(1,rows=1,cols=1); 
-
-for( i in 1:k, par=8, mode=LOCAL )
-{
-   sin1 = "./conf/PerfTestTool/"+i+"_in1.csv";   
-   sin2 = "./conf/PerfTestTool/"+i+"_in2.csv";   
-   
-   D = dynRead( dummy, sin1, m, n );
-   y = dynRead( dummy, sin2, m, 1 );
-   
-   A = t(D) %*% D; # X'X
-   b = t(D) %*% y; # X'y
-   beta = solve(A,b); 
-
-   sout = "./conf/PerfTestTool/"+i+"_out.csv";   
-   
-   X=dynWrite( beta, sout );
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
index beb08bd..343d846 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
@@ -30,15 +30,13 @@ import org.apache.sysml.test.utils.TestUtils;
 
 public class ParForRulebasedOptimizerTest extends AutomatedTestBase 
 {
-	
-	private final static String TEST_NAME1 = "parfor_optimizer1";
-	private final static String TEST_NAME2 = "parfor_optimizer2";
-	private final static String TEST_NAME3 = "parfor_optimizer3";
+	private final static String TEST_NAME1 = "parfor_optimizer1"; //+b for dml 
+	private final static String TEST_NAME2 = "parfor_optimizer2"; //+b for dml
+	private final static String TEST_NAME3 = "parfor_optimizer3"; //+b for dml
 	private final static String TEST_DIR = "functions/parfor/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + ParForRulebasedOptimizerTest.class.getSimpleName() + "/";
 	private final static double eps = 1e-10;
-	
-	
+		
 	private final static int rows1 = 1000; //small CP
 	private final static int rows2 = 10000; //large MR
 	
@@ -67,82 +65,127 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 
 	
 	@Test
-	public void testParForOptimizerCorrelationSmallSmall() 
-	{
-		runParForOptimizerTest(1, false, false);
+	public void testParForRulebasedOptimizerCorrelationSmallSmall() {
+		runParForOptimizerTest(1, false, false, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerCorrelationSmallLarge() {
+		runParForOptimizerTest(1, false, true, false);
+	}
 	
 	@Test
-	public void testParForOptimizerCorrelationSmallLarge() 
-	{
-		runParForOptimizerTest(1, false, true);
+	public void testParForRulebasedOptimizerCorrelationLargeSmall() {
+		runParForOptimizerTest(1, true, false, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerCorrelationLargeLarge() {
+		runParForOptimizerTest(1, true, true, false);
+	}
 	
 	@Test
-	public void testParForOptimizerCorrelationLargeSmall() 
-	{
-		runParForOptimizerTest(1, true, false);
+	public void testParForRulebasedOptimizerBivariateStatsSmallSmall() {
+		runParForOptimizerTest(2, false, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerCorrelationLargeLarge() 
-	{
-		runParForOptimizerTest(1, true, true);
+	public void testParForRulebasedOptimizerBivariateStatsSmallLarge() {
+		runParForOptimizerTest(2, false, true, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerBivariateStatsLargeSmall() {
+		runParForOptimizerTest(2, true, false, false);
+	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsSmallSmall() 
-	{
-		runParForOptimizerTest(2, false, false);
+	public void testParForRulebasedOptimizerBivariateStatsLargeLarge() {
+		runParForOptimizerTest(2, true, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsSmallLarge() 
-	{
-		runParForOptimizerTest(2, false, true);
+	public void testParForRulebasedOptimizerFunctionInvocationSmallSmall() {
+		runParForOptimizerTest(3, false, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsLargeSmall() 
-	{
-		runParForOptimizerTest(2, true, false);
+	public void testParForRulebasedOptimizerFunctionInvocationSmallLarge() {
+		runParForOptimizerTest(3, false, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsLargeLarge() 
-	{
-		runParForOptimizerTest(2, true, true);
+	public void testParForRulebasedOptimizerFunctionInvocationLargeSmall() {
+		runParForOptimizerTest(3, true, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationSmallSmall() 
-	{
-		runParForOptimizerTest(3, false, false);
+	public void testParForRulebasedOptimizerFunctionInvocationLargeLarge() {
+		runParForOptimizerTest(3, true, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationSmallLarge() 
-	{
-		runParForOptimizerTest(3, false, true);
+	public void testParForHeuristicOptimizerCorrelationSmallSmall() {
+		runParForOptimizerTest(1, false, false, true);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationLargeSmall() 
-	{
-		runParForOptimizerTest(3, true, false);
+	public void testParForHeuristicOptimizerCorrelationSmallLarge() {
+		runParForOptimizerTest(1, false, true, true);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationLargeLarge() 
-	{
-		runParForOptimizerTest(3, true, true);
+	public void testParForHeuristicOptimizerCorrelationLargeSmall() {
+		runParForOptimizerTest(1, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerCorrelationLargeLarge() {
+		runParForOptimizerTest(1, true, true, true);
 	}
 	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsSmallSmall() {
+		runParForOptimizerTest(2, false, false, true);
+	}
 	
-	private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols )
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsSmallLarge() {
+		runParForOptimizerTest(2, false, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsLargeSmall() {
+		runParForOptimizerTest(2, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsLargeLarge() {
+		runParForOptimizerTest(2, true, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationSmallSmall() {
+		runParForOptimizerTest(3, false, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationSmallLarge() {
+		runParForOptimizerTest(3, false, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationLargeSmall() {
+		runParForOptimizerTest(3, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationLargeLarge() {
+		runParForOptimizerTest(3, true, true, true);
+	}
+	
+	
+	private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols, boolean timebasedOpt )
 	{
 		//find right rows and cols configuration
 		int rows=-1, cols=-1;  
@@ -171,31 +214,34 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		switch( scriptNum )
 		{
 			case 1: 
-				runUnaryTest(scriptNum, rows, cols);
+				runUnaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;
 			case 2:
-				runNaryTest(scriptNum, rows, cols);
+				runNaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;
 			case 3: 
-				runUnaryTest(scriptNum, rows, cols);
+				runUnaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;	
 		}
 	}
 	
-	private void runUnaryTest(int scriptNum, int rows, int cols )
+	private void runUnaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols )
 	{
 		TestConfiguration config = null;
 		String HOME = SCRIPT_DIR + TEST_DIR;
 		if( scriptNum==1 )
 		{
 			config=getTestConfiguration(TEST_NAME1);
-			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+			String testname = TEST_NAME1 + (timebasedOpt ? "b" : "");
+			fullDMLScriptName = HOME + testname + ".dml";
 		}
 		else if( scriptNum==3 )
 		{
 			config=getTestConfiguration(TEST_NAME3);
-			fullDMLScriptName = HOME + TEST_NAME3 + ".dml";
+			String testname = TEST_NAME3 + (timebasedOpt ? "b" : "");
+			fullDMLScriptName = HOME + testname + ".dml";
 		}
+		
 		config.addVariable("rows", rows);
 		config.addVariable("cols", cols);
 		loadTestConfiguration(config);
@@ -235,7 +281,7 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R");		
 	}
 	
-	private void runNaryTest(int scriptNum, int rows, int cols)
+	private void runNaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols)
 	{
 		TestConfiguration config = getTestConfiguration(TEST_NAME2);
 		config.addVariable("rows", rows);
@@ -244,7 +290,8 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		
 		/* This is for running the junit test the new way, i.e., construct the arguments directly */
 		String HOME = SCRIPT_DIR + TEST_DIR;
-		fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
+		String testname = TEST_NAME2 + (timebasedOpt ? "b" : "");
+		fullDMLScriptName = HOME + testname + ".dml";
 		programArgs = new String[]{"-args", 
 			input("D"),
 			input("S1"), input("S2"),

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
new file mode 100644
index 0000000..cd0a3f7
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
@@ -0,0 +1,53 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n-1), opt=HEURISTIC )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY); 
+      R[i,j] = dummy * rXY; 
+      
+   }
+}   
+
+write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
new file mode 100644
index 0000000..6b41058
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
@@ -0,0 +1,277 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+dummy = matrix(1, rows=1, cols=1);
+
+
+parfor( i in 1:s1size, check=0, opt=HEURISTIC) {
+    a1 = as.scalar(S1[,i]);
+    k1 = as.scalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = as.scalar(S2[,j]);
+        k2 = as.scalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = dummy*r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = dummy*sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal   
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = dummy*eta;
+                basestats[8,pairID] = dummy*f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = degFreedom;
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    dummy = matrix(1, rows=1, cols=1);
+    Rks = X;
+    size = nrow(X);
+    for(i in 1:size) {
+        prefixSum = 0.0;
+        if( i>1 ){
+           prefixSum = sum(X[1:(i-1),1]);
+        } 
+        Rks[i,1] = dummy * (prefixSum + ((as.scalar(X[i,1])+1)/2));
+    }
+    Ranks = Rks;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+
+    covXY = 0.0;
+    for(i in 1:catA) {
+        covXY = covXY + sum((F[i,]/(W-1)) * (as.scalar(C[i,1])-meanX) * (t(D[,1])-meanY));
+    }
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
new file mode 100644
index 0000000..6eae759
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
@@ -0,0 +1,52 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+nd = $4;
+
+R = matrix(0, rows=1,cols=nd); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n/2), opt=HEURISTIC )
+{
+   X = V[ ,i];                 
+   Y = V[ ,n-i+1];                 
+   sx = execSum(X);
+   sy = execSum(Y);
+   R[1,i] = dummy*( sx+sy ); 
+}   
+
+write(R, $5);       
+
+
+execSum = function(Matrix[Double] X) return (Double sx) 
+{
+   if( ncol(X) > 0 )
+   {
+      sx = sum(X);    
+   }
+   else
+   {
+      sx = sum(X);
+   }
+}
\ No newline at end of file


[3/6] incubator-systemml git commit: [SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

Posted by mb...@apache.org.
[SYSTEMML-1326] Cleanup hop rewrites (removed redundancy, minor fixes)

This patch removes redundancy from existing hop rewrites by
consolidating common primitives into HopRewriteUtils in order to avoid
subtle bugs such as missing size propagation, missing line numbers, and
missing blocking configurations.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1fe1a02d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1fe1a02d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1fe1a02d

Branch: refs/heads/master
Commit: 1fe1a02d210356207d75cc3ffc2f246cd4a8d11b
Parents: 4316efe
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Feb 22 18:23:12 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 24 12:27:26 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/AggBinaryOp.java |  68 +--
 .../sysml/hops/ParameterizedBuiltinOp.java      |  45 +-
 .../java/org/apache/sysml/hops/ReorgOp.java     |   4 +-
 .../sysml/hops/globalopt/gdfgraph/GDFNode.java  |   5 +-
 .../sysml/hops/rewrite/HopRewriteUtils.java     | 179 +++---
 .../RewriteAlgebraicSimplificationDynamic.java  | 596 ++++++-------------
 .../RewriteAlgebraicSimplificationStatic.java   | 416 ++++---------
 .../hops/rewrite/RewriteConstantFolding.java    |  13 +-
 .../rewrite/RewriteForLoopVectorization.java    |  45 +-
 .../rewrite/RewriteIndexingVectorization.java   |  18 +-
 .../RewriteMatrixMultChainOptimization.java     |   4 +-
 .../rewrite/RewriteRemoveReadAfterWrite.java    |   7 +-
 .../rewrite/RewriteRemoveUnnecessaryCasts.java  |   7 +-
 .../RewriteSplitDagDataDependentOperators.java  |  19 +-
 .../rewrite/RewriteSplitDagUnknownCSVRead.java  |   4 +-
 .../org/apache/sysml/parser/DMLTranslator.java  |   4 +-
 .../parfor/opt/OptimizerRuleBased.java          |   5 +-
 17 files changed, 489 insertions(+), 950 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 73dd8a4..dd9182d 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -191,7 +191,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			else if( et == ExecType.SPARK ) 
 			{
 				//matrix mult operation selection part 3 (SPARK type)
-				boolean tmmRewrite = input1 instanceof ReorgOp && ((ReorgOp)input1).getOp()==ReOrgOp.TRANSPOSE;
+				boolean tmmRewrite = HopRewriteUtils.isTransposeOperation(input1);
 				_method = optFindMMultMethodSpark ( 
 						input1.getDim1(), input1.getDim2(), input1.getRowsInBlock(), input1.getColsInBlock(), input1.getNnz(),   
 						input2.getDim1(), input2.getDim2(), input2.getRowsInBlock(), input2.getColsInBlock(), input2.getNnz(),
@@ -459,7 +459,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 	{
 		int index = left ? 0 : 1;
 		return !(getInput().get(index) instanceof DataOp && ((DataOp)getInput().get(index)).requiresCheckpoint())  
-			&& !(getInput().get(index) instanceof ReorgOp && ((ReorgOp)getInput().get(index)).getOp()==ReOrgOp.TRANSPOSE)
+			&& !HopRewriteUtils.isTransposeOperation(getInput().get(index))
 			&& getInput().get(index).getParent().size()==1 //bagg is only parent	
 			&& !getInput().get(index).areDimsBelowThreshold() 
 			&& getInput().get(index).optFindExecType() == ExecType.SPARK
@@ -479,15 +479,13 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop in1 = getInput().get(0);
 		Hop in2 = getInput().get(1);
 		
-		if(    in1 instanceof ReorgOp 
-			&& ((ReorgOp)in1).getOp() == ReOrgOp.TRANSPOSE 
+		if( HopRewriteUtils.isTransposeOperation(in1)
 			&& in1.getInput().get(0) == in2 )
 		{
 			ret = MMTSJType.LEFT;
 		}
 		
-		if(    in2 instanceof ReorgOp 
-			&& ((ReorgOp)in2).getOp() == ReOrgOp.TRANSPOSE 
+		if( HopRewriteUtils.isTransposeOperation(in2) 
 			&& in2.getInput().get(0) == in1 )
 		{
 			ret = MMTSJType.RIGHT;
@@ -510,7 +508,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop in2 = getInput().get(1);
 		
 		//check for transpose left input (both chain types)
-		if( in1 instanceof ReorgOp && ((ReorgOp)in1).getOp() == ReOrgOp.TRANSPOSE )
+		if( HopRewriteUtils.isTransposeOperation(in1) )
 		{
 			Hop X = in1.getInput().get(0);
 				
@@ -615,7 +613,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop rightInput = getInput().get(1);
 		
 		Hop nrow = HopRewriteUtils.createValueHop(pmInput, true); //NROW
-		HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+		nrow.setOutputBlocksizes(0, 0);
 		nrow.setForcedExecType(ExecType.CP);
 		HopRewriteUtils.copyLineNumbers(this, nrow);
 		Lop lnrow = nrow.constructLops();
@@ -644,7 +642,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			Hop h2 = getInput().get(1);
 			Lop left; Lop right;
 			boolean isLeftTransposed; boolean isRightTransposed;
-			if( h1 instanceof ReorgOp && ((ReorgOp)h1).getOp()==ReOrgOp.TRANSPOSE ) {
+			if( HopRewriteUtils.isTransposeOperation(h1) ) {
 				isLeftTransposed = true;
 				left = h1.getInput().get(0).constructLops();
 			}
@@ -652,7 +650,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 				isLeftTransposed = false;
 				left = h1.constructLops();
 			}
-			if( h2 instanceof ReorgOp && ((ReorgOp)h2).getOp()==ReOrgOp.TRANSPOSE ) {
+			if( HopRewriteUtils.isTransposeOperation(h2) ) {
 				isRightTransposed = true;
 				right = h2.getInput().get(0).constructLops();
 			}
@@ -872,8 +870,6 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		
 		Hop pmInput = getInput().get(0);
 		Hop rightInput = getInput().get(1);
-		long brlen = pmInput.getRowsInBlock();
-		long bclen = pmInput.getColsInBlock();
 		
 		Lop lpmInput = pmInput.constructLops();
 		Hop nrow = null;
@@ -887,30 +883,19 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			//v = rowMaxIndex(t(pm)) * rowMax(t(pm)) 
 			ReorgOp transpose = HopRewriteUtils.createTranspose(pmInput);
 			transpose.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, transpose);	
 			
-			AggUnaryOp agg1 = new AggUnaryOp("tmp2a", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAXINDEX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg1, brlen, bclen);
-			agg1.refreshSizeInformation();
+			AggUnaryOp agg1 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAXINDEX, Direction.Row);
 			agg1.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, agg1);
 			
-			AggUnaryOp agg2 = new AggUnaryOp("tmp2b", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg2, brlen, bclen);
-			agg2.refreshSizeInformation();
+			AggUnaryOp agg2 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAX, Direction.Row);
 			agg2.setForcedExecType(ExecType.SPARK);
-			HopRewriteUtils.copyLineNumbers(this, agg2);
 			
-			BinaryOp mult = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, agg1, agg2);
-			HopRewriteUtils.setOutputBlocksizes(mult, brlen, bclen); 
-			mult.refreshSizeInformation();
+			BinaryOp mult = HopRewriteUtils.createBinary(agg1, agg2, OpOp2.MULT);
 			mult.setForcedExecType(ExecType.SPARK);
-			//mult.computeMemEstimate(memo); //select exec type
-			HopRewriteUtils.copyLineNumbers(this, mult);
 			
 			//compute NROW target via nrow(m)
 			nrow = HopRewriteUtils.createValueHop(pmInput, true);
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(ExecType.CP);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 			
@@ -921,7 +906,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		{
 			//compute NROW target via max(v)
 			nrow = HopRewriteUtils.createAggUnaryOp(pmInput, AggOp.MAX, Direction.RowCol); 
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(etVect);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 		}
@@ -1239,8 +1224,6 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		
 		Hop pmInput = getInput().get(0);
 		Hop rightInput = getInput().get(1);
-		long brlen = pmInput.getRowsInBlock();
-		long bclen = pmInput.getColsInBlock();
 		
 		Lop lpmInput = pmInput.constructLops();
 		Hop nrow = null;
@@ -1254,29 +1237,19 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 			//v = rowMaxIndex(t(pm)) * rowMax(t(pm)) 
 			ReorgOp transpose = HopRewriteUtils.createTranspose(pmInput);
 			transpose.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, transpose);	
 			
-			AggUnaryOp agg1 = new AggUnaryOp("tmp2a", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAXINDEX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg1, brlen, bclen);
-			agg1.refreshSizeInformation();
+			AggUnaryOp agg1 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAXINDEX, Direction.Row);
 			agg1.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, agg1);
 			
-			AggUnaryOp agg2 = new AggUnaryOp("tmp2b", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, Direction.Row, transpose);
-			HopRewriteUtils.setOutputBlocksizes(agg2, brlen, bclen);
-			agg2.refreshSizeInformation();
+			AggUnaryOp agg2 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAX, Direction.Row);
 			agg2.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, agg2);
 			
-			BinaryOp mult = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, agg1, agg2);
-			HopRewriteUtils.setOutputBlocksizes(mult, brlen, bclen); 
-			mult.refreshSizeInformation();
+			BinaryOp mult = HopRewriteUtils.createBinary(agg1, agg2, OpOp2.MULT);
 			mult.setForcedExecType(ExecType.MR);
-			HopRewriteUtils.copyLineNumbers(this, mult);
 			
 			//compute NROW target via nrow(m)
 			nrow = HopRewriteUtils.createValueHop(pmInput, true);
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(ExecType.CP);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 				
@@ -1287,7 +1260,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		{
 			//compute NROW target via max(v)
 			nrow = HopRewriteUtils.createAggUnaryOp(pmInput, AggOp.MAX, Direction.RowCol); 
-			HopRewriteUtils.setOutputBlocksizes(nrow, 0, 0);
+			nrow.setOutputBlocksizes(0, 0);
 			nrow.setForcedExecType(etVect);
 			HopRewriteUtils.copyLineNumbers(this, nrow);
 		}
@@ -1345,7 +1318,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		//even a ba in CP does not imply that both transposes can be executed in CP)
 		if( CP ) //in-memory ba 
 		{
-			if( h1 instanceof ReorgOp && ((ReorgOp)h1).getOp()==ReOrgOp.TRANSPOSE )
+			if( HopRewriteUtils.isTransposeOperation(h1) )
 			{
 				long m = h1.getDim1();
 				long cd = h1.getDim2();
@@ -1861,8 +1834,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 		Hop input1 = getInput().get(0);
 		Hop input2 = getInput().get(1);
 		
-		if( isMatrixMultiply() )
-		{
+		if( isMatrixMultiply() ) {
 			setDim1(input1.getDim1());
 			setDim2(input2.getDim2());
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index e2ec190..72e9115 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -512,11 +512,11 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				//step1: compute index vectors
 				Hop ppred0 = input;
 				if( !isPPredInput ) { //ppred only if required
-					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
+					ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 					HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
 				}
 				
-				UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0); 
+				UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM); 
 				HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
 			
 				Lop loutput = null;
@@ -524,14 +524,14 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
 				if( _outputPermutationMatrix && mest < mbudget ) //SPECIAL CASE: SELECTION VECTOR
 				{
-					BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
+					BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
 					HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
 					loutput = sel.constructLops();
 				}
 				else //GENERAL CASE: GENERAL PERMUTATION MATRIX
 				{
 					//max ensures non-zero entries and at least one output row
-					BinaryOp max = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MAX, cumsum, new LiteralOp(1));
+					BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
 					HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
 					
 					DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
@@ -541,7 +541,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					//step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
 					//note: weights always the input (even if isPPredInput) because input also includes 0s
 					TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
-					HopRewriteUtils.setOutputBlocksizes(table, brlen, bclen);
+					table.setOutputBlocksizes(brlen, bclen);
 					table.refreshSizeInformation();
 					table.setForcedExecType(ExecType.MR); //force MR 
 					HopRewriteUtils.copyLineNumbers(this, table);
@@ -581,23 +581,18 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				
 				if(selectHop == null) {
 					//Step1: compute row/col non-empty indicators 
-					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
-					HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
-					ppred0.refreshSizeInformation();
+					ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 					ppred0.setForcedExecType(ExecType.MR); //always MR 
-					HopRewriteUtils.copyLineNumbers(this, ppred0);
 					
 					emptyInd = ppred0;
 					if( !((rmRows && clen == 1) || (!rmRows && rlen==1)) ){
-						emptyInd = new AggUnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, rmRows?Direction.Row:Direction.Col, ppred0);
-						HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
-						emptyInd.refreshSizeInformation();
+						emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows?Direction.Row:Direction.Col);
 						emptyInd.setForcedExecType(ExecType.MR); //always MR
 						HopRewriteUtils.copyLineNumbers(this, emptyInd);
 					}
 				} else {
 					emptyInd = selectHop;
-					HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
+					emptyInd.setOutputBlocksizes(brlen, bclen);
 					emptyInd.refreshSizeInformation();
 					emptyInd.setForcedExecType(ExecType.MR); //always MR
 					HopRewriteUtils.copyLineNumbers(this, emptyInd);
@@ -610,7 +605,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);	
 				}
 			
-				UnaryOp cumsum = new UnaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, cumsumInput); 
+				UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM); 
 				HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
 			
 				Hop cumsumOutput = cumsum;
@@ -619,10 +614,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);	
 				}
 				
-				Hop maxDim = new AggUnaryOp("tmp4", DataType.SCALAR, ValueType.DOUBLE, AggOp.MAX, Direction.RowCol, cumsumOutput); //alternative: right indexing
+				Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol); //alternative: right indexing
 				HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
 				
-				BinaryOp offsets = new BinaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cumsumOutput, emptyInd);
+				BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
 				HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
 				
 				//Step 3: gather non-empty rows/cols into final results 
@@ -713,23 +708,17 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			
 			if(selectHop == null) {
 				//Step1: compute row/col non-empty indicators 
-				ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp(0));
-				HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
-				ppred0.refreshSizeInformation();
+				ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
 				ppred0.setForcedExecType(ExecType.SPARK); //always Spark
-				HopRewriteUtils.copyLineNumbers(this, ppred0);
 				
 				emptyInd = ppred0;
 				if( !((rmRows && clen == 1) || (!rmRows && rlen==1)) ){
-					emptyInd = new AggUnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, AggOp.MAX, rmRows?Direction.Row:Direction.Col, ppred0);
-					HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
-					emptyInd.refreshSizeInformation();
+					emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows?Direction.Row:Direction.Col);
 					emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
-					HopRewriteUtils.copyLineNumbers(this, emptyInd);
 				}
 			} else {
 				emptyInd = selectHop;
-				HopRewriteUtils.setOutputBlocksizes(emptyInd, brlen, bclen);
+				emptyInd.setOutputBlocksizes(brlen, bclen);
 				emptyInd.refreshSizeInformation();
 				emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
 				HopRewriteUtils.copyLineNumbers(this, emptyInd);
@@ -742,7 +731,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
 			}
 		
-			UnaryOp cumsum = new UnaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, cumsumInput); 
+			UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM); 
 			HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
 		
 			Hop cumsumOutput = cumsum;
@@ -751,10 +740,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);	
 			}
 			
-			Hop maxDim = new AggUnaryOp("tmp4", DataType.SCALAR, ValueType.DOUBLE, AggOp.MAX, Direction.RowCol, cumsumOutput); //alternative: right indexing
+			Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol); //alternative: right indexing
 			HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
 			
-			BinaryOp offsets = new BinaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, cumsumOutput, emptyInd);
+			BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
 			HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
 			
 			//Step 3: gather non-empty rows/cols into final results 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index abe03a8..8d0b4b4 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -256,7 +256,7 @@ public class ReorgOp extends Hop implements MultiThreadedHop
 						vinput = new IndexingOp("tmp1", getDataType(), getValueType(), input, new LiteralOp(1L), 
 								HopRewriteUtils.createValueHop(input, true), by, by, false, true);
 						vinput.refreshSizeInformation();
-						HopRewriteUtils.setOutputBlocksizes(vinput, getRowsInBlock(), getColsInBlock());
+						vinput.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
 						HopRewriteUtils.copyLineNumbers(this, vinput);	
 					}
 					
@@ -314,7 +314,7 @@ public class ReorgOp extends Hop implements MultiThreadedHop
 						
 						//generate table
 						TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, seq, voutput, new LiteralOp(1L) );
-						HopRewriteUtils.setOutputBlocksizes(table, getRowsInBlock(), getColsInBlock());
+						table.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
 						table.refreshSizeInformation();
 						table.setForcedExecType(ExecType.MR); //force MR 
 						HopRewriteUtils.copyLineNumbers(this, table);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
index e87a911..e385a86 100644
--- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
+++ b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java
@@ -28,9 +28,8 @@ import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.FileFormatTypes;
 import org.apache.sysml.hops.Hop.OpOp1;
-import org.apache.sysml.hops.Hop.ReOrgOp;
-import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.UnaryOp;
+import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.ProgramBlock;
@@ -149,7 +148,7 @@ public class GDFNode
 	{
 		return (   _hop instanceof UnaryOp && format!=FileFormatTypes.CSV
 				|| (_hop instanceof AggUnaryOp && ((AggUnaryOp)_hop).getDirection()==Direction.RowCol && format!=FileFormatTypes.CSV)
-				|| (_hop instanceof ReorgOp && ((ReorgOp)_hop).getOp()==ReOrgOp.TRANSPOSE && format!=FileFormatTypes.CSV)
+				|| (HopRewriteUtils.isTransposeOperation(_hop) && format!=FileFormatTypes.CSV)
 				|| format==FileFormatTypes.BINARY ); //any op
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1fe1a02d/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index d3be09d..7f65ddd 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
 
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
@@ -222,16 +223,12 @@ public class HopRewriteUtils
 		return childs.indexOf(child);
 	}
 	
-	public static void removeChildReference( Hop parent, Hop child )
-	{
-		//remove child reference
+	public static void removeChildReference( Hop parent, Hop child ) {
 		parent.getInput().remove( child );
 		child.getParent().remove( parent );
 	}
 	
-	public static void removeChildReferenceByPos( Hop parent, Hop child, int posChild )
-	{
-		//remove child reference
+	public static void removeChildReferenceByPos( Hop parent, Hop child, int posChild ) {
 		parent.getInput().remove( posChild );
 		child.getParent().remove( parent );
 	}
@@ -246,18 +243,35 @@ public class HopRewriteUtils
 		parent.getInput().clear();
 	}
 	
-	public static void addChildReference( Hop parent, Hop child )
-	{
+	public static void addChildReference( Hop parent, Hop child ) {
 		parent.getInput().add( child );
 		child.getParent().add( parent );
 	}
 	
-	public static void addChildReference( Hop parent, Hop child, int pos )
-	{
+	public static void addChildReference( Hop parent, Hop child, int pos ){
 		parent.getInput().add( pos, child );
 		child.getParent().add( parent );
 	}
 	
+	public static void replaceChildReference( Hop parent, Hop inOld, Hop inNew ) {
+		int pos = getChildReferencePos(parent, inOld);
+		removeChildReferenceByPos(parent, inOld, pos);
+		addChildReference(parent, inNew, pos);
+		parent.refreshSizeInformation();
+	}
+	
+	public static void replaceChildReference( Hop parent, Hop inOld, Hop inNew, int pos ) {
+		removeChildReferenceByPos(parent, inOld, pos);
+		addChildReference(parent, inNew, pos);
+		parent.refreshSizeInformation();
+	}
+	
+	public static void cleanupUnreferenced( Hop... inputs ) {
+		for( Hop input : inputs )
+			if( input.getParent().isEmpty() )
+				removeAllChildReferences(input);
+	}
+	
 	public static Hop createDataGenOp( Hop input, double value ) 
 		throws HopsException
 	{		
@@ -279,8 +293,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(input.getRowsInBlock());
-		datagen.setColsInBlock(input.getColsInBlock());
+		datagen.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -335,12 +349,11 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		DataGenOp datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params2);
-		datagen.setRowsInBlock(inputGen.getRowsInBlock());
-		datagen.setColsInBlock(inputGen.getColsInBlock());
+		datagen.setOutputBlocksizes(inputGen.getRowsInBlock(), inputGen.getColsInBlock());
+		copyLineNumbers(inputGen, datagen);
 		
-		if( smin==0 && smax==0 ) {
+		if( smin==0 && smax==0 )
 			datagen.setNnz(0);
-		}
 			
 		return datagen;
 	}
@@ -366,8 +379,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -399,8 +412,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -425,8 +438,8 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(rowInput.getRowsInBlock());
-		datagen.setColsInBlock(colInput.getColsInBlock());
+		datagen.setOutputBlocksizes(rowInput.getRowsInBlock(), colInput.getColsInBlock());
+		copyLineNumbers(rowInput, datagen);
 		
 		if( value==0 )
 			datagen.setNnz(0);
@@ -441,8 +454,8 @@ public class HopRewriteUtils
 	public static ReorgOp createReorg(Hop input, ReOrgOp rop)
 	{
 		ReorgOp transpose = new ReorgOp(input.getName(), input.getDataType(), input.getValueType(), rop, input);
-		HopRewriteUtils.setOutputBlocksizes(transpose, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, transpose);
+		transpose.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, transpose);
 		transpose.refreshSizeInformation();	
 		
 		return transpose;
@@ -451,31 +464,35 @@ public class HopRewriteUtils
 	public static UnaryOp createUnary(Hop input, OpOp1 type) 
 		throws HopsException
 	{
-		DataType dt = (type==OpOp1.CAST_AS_SCALAR) ? DataType.SCALAR : input.getDataType();
-		UnaryOp unary = new UnaryOp(input.getName(), dt, input.getValueType(), type, input);
-		HopRewriteUtils.setOutputBlocksizes(unary, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, unary);
+		DataType dt = (type==OpOp1.CAST_AS_SCALAR) ? DataType.SCALAR : 
+			(type==OpOp1.CAST_AS_MATRIX) ? DataType.MATRIX : input.getDataType();
+		ValueType vt = (type==OpOp1.CAST_AS_MATRIX) ? ValueType.DOUBLE : input.getValueType();
+		UnaryOp unary = new UnaryOp(input.getName(), dt, vt, type, input);
+		unary.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		if( type == OpOp1.CAST_AS_SCALAR || type == OpOp1.CAST_AS_MATRIX ) {
+			int dim = (type==OpOp1.CAST_AS_SCALAR) ? 0 : 1;
+			int blksz = (type==OpOp1.CAST_AS_SCALAR) ? 0 : ConfigurationManager.getBlocksize();
+			setOutputParameters(unary, dim, dim, blksz, blksz, -1);		
+		}
+		
+		copyLineNumbers(input, unary);
 		unary.refreshSizeInformation();	
 		
 		return unary;
 	}
 	
-	public static BinaryOp createMinus(Hop input)
-	{
-		BinaryOp minus = new BinaryOp(input.getName(), input.getDataType(), input.getValueType(), OpOp2.MINUS, new LiteralOp(0), input);
-		HopRewriteUtils.setOutputBlocksizes(minus, input.getRowsInBlock(), input.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input, minus);
-		minus.refreshSizeInformation();	
-		
-		return minus;
+	public static BinaryOp createBinaryMinus(Hop input) {
+		return createBinary(new LiteralOp(0), input, OpOp2.MINUS);
 	}
 	
 	public static BinaryOp createBinary(Hop input1, Hop input2, OpOp2 op)
 	{
-		BinaryOp bop = new BinaryOp(input1.getName(), input1.getDataType(), 
-				input1.getValueType(), op, input1, input2);
-		HopRewriteUtils.setOutputBlocksizes(bop, input1.getRowsInBlock(), input1.getColsInBlock());
-		HopRewriteUtils.copyLineNumbers(input1, bop);
+		Hop mainInput = input1.getDataType().isMatrix() ? input1 : 
+			input2.getDataType().isMatrix() ? input2 : input1;
+		BinaryOp bop = new BinaryOp(mainInput.getName(), mainInput.getDataType(), 
+			mainInput.getValueType(), op, input1, input2);
+		bop.setOutputBlocksizes(mainInput.getRowsInBlock(), mainInput.getColsInBlock());
+		copyLineNumbers(mainInput, bop);
 		bop.refreshSizeInformation();	
 		
 		return bop;
@@ -485,23 +502,20 @@ public class HopRewriteUtils
 		return createAggUnaryOp(input, AggOp.SUM, Direction.RowCol);
 	}
 	
-	public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir )
-	{
+	public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir ) {
 		DataType dt = (dir==Direction.RowCol) ? DataType.SCALAR : input.getDataType();
-		
 		AggUnaryOp auop = new AggUnaryOp(input.getName(), dt, input.getValueType(), op, dir, input);
-		auop.setRowsInBlock(input.getRowsInBlock());
-		auop.setColsInBlock(input.getColsInBlock());
+		auop.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, auop);
 		auop.refreshSizeInformation();
 		
 		return auop;
 	}
 	
-	public static AggBinaryOp createMatrixMultiply(Hop left, Hop right)
-	{
+	public static AggBinaryOp createMatrixMultiply(Hop left, Hop right) {
 		AggBinaryOp mmult = new AggBinaryOp(left.getName(), left.getDataType(), left.getValueType(), OpOp2.MULT, AggOp.SUM, left, right);
-		mmult.setRowsInBlock(left.getRowsInBlock());
-		mmult.setColsInBlock(right.getColsInBlock());
+		mmult.setOutputBlocksizes(left.getRowsInBlock(), right.getColsInBlock());
+		copyLineNumbers(left, mmult);
 		mmult.refreshSizeInformation();
 		
 		return mmult;
@@ -550,55 +564,42 @@ public class HopRewriteUtils
 		
 		//note internal refresh size information
 		DataGenOp datagen = new DataGenOp(DataGenMethod.SEQ, new DataIdentifier("tmp"), params);
-		datagen.setRowsInBlock(input.getRowsInBlock());
-		datagen.setColsInBlock(input.getColsInBlock());
+		datagen.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
+		copyLineNumbers(input, datagen);
 		
 		return datagen;
 	}
 	
 	public static TernaryOp createTernaryOp(Hop mleft, Hop smid, Hop mright, OpOp3 op) {
 		TernaryOp ternOp = new TernaryOp("tmp", DataType.MATRIX, ValueType.DOUBLE, op, mleft, smid, mright);
-		ternOp.setRowsInBlock(mleft.getRowsInBlock());
-		ternOp.setColsInBlock(mleft.getColsInBlock());
+		ternOp.setOutputBlocksizes(mleft.getRowsInBlock(), mleft.getColsInBlock());
+		copyLineNumbers(mleft, ternOp);
 		ternOp.refreshSizeInformation();
 		return ternOp;
 	}
 	
-	public static void setOutputBlocksizes( Hop hop, long brlen, long bclen )
-	{
-		hop.setRowsInBlock( brlen );
-		hop.setColsInBlock( bclen );
-	}
-	
-	public static void setOutputParameters( Hop hop, long rlen, long clen, long brlen, long bclen, long nnz )
-	{
+	public static void setOutputParameters( Hop hop, long rlen, long clen, long brlen, long bclen, long nnz ) {
 		hop.setDim1( rlen );
 		hop.setDim2( clen );
-		hop.setRowsInBlock( brlen );
-		hop.setColsInBlock( bclen );
+		hop.setOutputBlocksizes(brlen, bclen );
 		hop.setNnz( nnz );
 	}
 	
-	public static void setOutputParametersForScalar( Hop hop )
-	{
+	public static void setOutputParametersForScalar( Hop hop ) {
 		hop.setDim1( 0 );
 		hop.setDim2( 0 );
-		hop.setRowsInBlock( -1 );
-		hop.setColsInBlock( -1 );
+		hop.setOutputBlocksizes(-1, -1 );
 		hop.setNnz( -1 );
 	}
 	
-	public static void refreshOutputParameters( Hop hnew, Hop hold )
-	{
+	public static void refreshOutputParameters( Hop hnew, Hop hold ) {
 		hnew.setDim1( hold.getDim1() );
 		hnew.setDim2( hold.getDim2() );
-		hnew.setRowsInBlock(hold.getRowsInBlock());
-		hnew.setColsInBlock(hold.getColsInBlock());
+		hnew.setOutputBlocksizes(hold.getRowsInBlock(), hold.getColsInBlock());
 		hnew.refreshSizeInformation();
 	}
 	
-	public static void copyLineNumbers( Hop src, Hop dest )
-	{
+	public static void copyLineNumbers( Hop src, Hop dest ) {
 		dest.setAllPositions(src.getBeginLine(), src.getBeginColumn(), src.getEndLine(), src.getEndColumn());
 	}
 	
@@ -610,7 +611,7 @@ public class HopRewriteUtils
 	public static void updateHopCharacteristics( Hop hop, long brlen, long bclen, MemoTable memo, Hop src )
 	{
 		//update block sizes and dimensions  
-		setOutputBlocksizes(hop, brlen, bclen);
+		hop.setOutputBlocksizes(brlen, bclen);
 		hop.refreshSizeInformation();
 		
 		//compute memory estimates (for exec type selection)
@@ -668,7 +669,7 @@ public class HopRewriteUtils
 	}
 	
 	public static boolean isOuterProductLikeMM( Hop hop ) {
-		return hop instanceof AggBinaryOp
+		return isMatrixMultiply(hop)
 			&& hop.getInput().get(0).getDim1() > hop.getInput().get(0).getDim2()
 			&& hop.getInput().get(1).getDim1() < hop.getInput().get(1).getDim2();
 	}
@@ -715,6 +716,10 @@ public class HopRewriteUtils
 		return (hop instanceof ReorgOp && ((ReorgOp)hop).getOp()==ReOrgOp.TRANSPOSE);
 	}
 	
+	public static boolean isTransposeOperation(Hop hop, int maxParents) {
+		return isTransposeOperation(hop) && hop.getParent().size() <= maxParents;
+	}
+	
 	public static boolean containsTransposeOperation(ArrayList<Hop> hops) {
 		boolean ret = false;
 		for( Hop hop : hops )
@@ -723,8 +728,16 @@ public class HopRewriteUtils
 	}
 	
 	public static boolean isTransposeOfItself(Hop hop1, Hop hop2) {
-		return hop1 instanceof ReorgOp && ((ReorgOp)hop1).getOp()==ReOrgOp.TRANSPOSE && hop1.getInput().get(0) == hop2
-			|| hop2 instanceof ReorgOp && ((ReorgOp)hop2).getOp()==ReOrgOp.TRANSPOSE && hop2.getInput().get(0) == hop1;	
+		return isTransposeOperation(hop1) && hop1.getInput().get(0) == hop2
+			|| isTransposeOperation(hop2) && hop2.getInput().get(0) == hop1;	
+	}
+	
+	public static boolean isBinary(Hop hop, OpOp2 type) {
+		return hop instanceof BinaryOp && ((BinaryOp)hop).getOp()==type;
+	}
+	
+	public static boolean isBinary(Hop hop, OpOp2 type, int maxParents) {
+		return isBinary(hop, type) && hop.getParent().size() <= maxParents;
 	}
 	
 	public static boolean isBinaryMatrixScalarOperation(Hop hop) {
@@ -733,6 +746,18 @@ public class HopRewriteUtils
 			||(hop.getInput().get(1).getDataType().isMatrix() && hop.getInput().get(0).getDataType().isScalar()));
 	}
 	
+	public static boolean isUnary(Hop hop, OpOp1 type) {
+		return hop instanceof UnaryOp && ((UnaryOp)hop).getOp()==type;
+	}
+	
+	public static boolean isUnary(Hop hop, OpOp1 type, int maxParents) {
+		return isUnary(hop, type) && hop.getParent().size() <= maxParents;
+	}
+	
+	public static boolean isMatrixMultiply(Hop hop) {
+		return hop instanceof AggBinaryOp && ((AggBinaryOp)hop).isMatrixMultiply();
+	}
+	
 	public static boolean isNonZeroIndicator(Hop pred, Hop hop )
 	{
 		if( pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL