You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/03 02:33:14 UTC

systemml git commit: [SYSTEMML-1884] Fix unnecessary spark/mr insts on removeEmpty w/ select

Repository: systemml
Updated Branches:
  refs/heads/master ba73291c9 -> c145dbe3f


[SYSTEMML-1884] Fix unnecessary spark/mr insts on removeEmpty w/ select

This patch fixes the compilation of removeEmpty with select vectors,
which can cause unnecessary spark/mr instructions if the respective
removeEmpty is initially compiled to spark/mr and the select vector is
computed in the same HOP DAG. In detail, the issue was due to setting
the forced execution type for the select vector even though its
externally provided and not computed within the operation itself.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c145dbe3
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c145dbe3
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c145dbe3

Branch: refs/heads/master
Commit: c145dbe3fd6706b4ecab1323d381f99571aa92ed
Parents: ba73291
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Sep 2 19:00:36 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Sep 2 19:34:41 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/ParameterizedBuiltinOp.java      | 188 ++++++++-----------
 .../recompile/RemoveEmptyPotpourriTest.java     |  24 ++-
 .../recompile/remove_empty_potpourri5.R         |  31 +++
 .../recompile/remove_empty_potpourri5.dml       |  29 +++
 4 files changed, 158 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/c145dbe3/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index a611893..80e8764 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -92,17 +92,15 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 	public ParameterizedBuiltinOp(String l, DataType dt, ValueType vt,
 			ParamBuiltinOp op, HashMap<String, Hop> inputParameters) {
 		super(l, dt, vt);
-
+		
 		_op = op;
-
+		
 		int index = 0;
-		for( Entry<String,Hop> e : inputParameters.entrySet() ) 
-		{
+		for( Entry<String,Hop> e : inputParameters.entrySet() ) {
 			String s = e.getKey();
 			Hop input = e.getValue();
 			getInput().add(input);
 			input.getParent().add(this);
-
 			_paramIndexMap.put(s, index);
 			index++;
 		}
@@ -122,19 +120,6 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		return _paramIndexMap;
 	}
 	
-	/**
-	 * Returns a parameters by its name. Returns null if not present  
-	 * @param val name
-	 * @return high-level operator
-	 */
-	public Hop getInputParameter(String val){
-		Integer index = getParamIndexMap().get(val);
-		if (index == null)
-			return null;
-		else
-			return getInput().get(index);
-	}
-		
 	@Override
 	public String getOpString() {
 		return "" + _op;
@@ -144,24 +129,21 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		return _op;
 	}
 
-	public void setOutputEmptyBlocks(boolean flag)
-	{
+	public void setOutputEmptyBlocks(boolean flag) {
 		_outputEmptyBlocks = flag;
 	}
 	
-	public void setOutputPermutationMatrix(boolean flag)
-	{
+	public void setOutputPermutationMatrix(boolean flag) {
 		_outputPermutationMatrix = flag;
 	}
 	
 	public Hop getTargetHop() {
-		return _paramIndexMap.containsKey("target") ?   
-			getInput().get(_paramIndexMap.get("target")) : null;
+		return getParameterHop("target");
 	}
 	
 	public Hop getParameterHop(String name) {
-		return _paramIndexMap.containsKey(name) ?   
-			getInput().get(_paramIndexMap.get(name)) : null;	
+		return _paramIndexMap.containsKey(name) ?
+			getInput().get(_paramIndexMap.get(name)) : null;
 	}
 	
 	@Override
@@ -189,12 +171,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		
 		// construct lops for all input parameters
 		HashMap<String, Lop> inputlops = new HashMap<String, Lop>();
-		for (Entry<String, Integer> cur : _paramIndexMap.entrySet()) {
-			inputlops.put(cur.getKey(), getInput().get(cur.getValue())
-					.constructLops());
-		}
+		for (Entry<String, Integer> cur : _paramIndexMap.entrySet())
+			inputlops.put(cur.getKey(), getInput().get(cur.getValue()).constructLops());
 
-		switch( _op ) {		
+		switch( _op ) {
 			case GROUPEDAGG: { 
 				ExecType et = optFindExecType();
 				constructLopsGroupedAggregate(inputlops, et);
@@ -218,7 +198,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			case TRANSFORMDECODE: 
 			case TRANSFORMMETA: 
 			case TOSTRING: {
-				ExecType et = optFindExecType();			
+				ExecType et = optFindExecType();
 				ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops,
 						HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
 				setOutputDimensions(pbilop);
@@ -232,7 +212,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		
 		//add reblock/checkpoint lops if necessary
 		constructAndSetLopsDataFlowProperties();
-				
+		
 		return getLops();
 	}
 	
@@ -260,7 +240,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			else { //vector or matrix
 				outputDim1 = inDim2;
 				outputDim2 = ngroups;
-			}			
+			}
 		}
 		
 		//construct lops
@@ -300,8 +280,8 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
 			
 				if( groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) //mapgroupedagg
-					&& getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp
-					&& ((LiteralOp)getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum")
+					&& getParameterHop(Statement.GAGG_FN) instanceof LiteralOp
+					&& ((LiteralOp)getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum")
 					&& inputlops.get(Statement.GAGG_NUM_GROUPS) != null ) 
 				{
 					//pre partitioning
@@ -326,7 +306,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					setLineNumbers(grp);
 					
 					Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
-					agg1.setupCorrectionLocation(CorrectionLocationType.NONE);  
+					agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
 					agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);			
 					grp_agg = agg1;
 					
@@ -338,7 +318,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 						|| target.getDim2()<=0  )                   // unkown
 					{
 						long m1_dim1 = target.getDim1();
-						long m1_dim2 = target.getDim2();		
+						long m1_dim2 = target.getDim2();
 						long m2_dim1 = groups.getDim1();
 						long m2_dim2 = groups.getDim2();
 						long m3_dim1 = m1_dim1; 
@@ -350,7 +330,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 						Lop offset = createOffsetLop(target, true); 
 						Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
 						setOutputDimensions(rep);
-						setLineNumbers(rep);	
+						setLineNumbers(rep);
 						
 						Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
 						group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
@@ -374,7 +354,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
 					inputlops.remove(Statement.GAGG_TARGET);
 					inputlops.remove(Statement.GAGG_GROUPS);
-
+					
 					grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
 					grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
 
@@ -392,31 +372,30 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			if( et == ExecType.CP) 
 			{
 				int k = OptimizerUtils.getConstrainedNumThreads( _maxNumThreads );
-				grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);						
+				grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
 				grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
 			}
 			else if(et == ExecType.SPARK) 
 			{
 				//physical operator selection
-				Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
+				Hop groups = getParameterHop(Statement.GAGG_GROUPS);
 				boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null &&
 						OptimizerUtils.checkSparkBroadcastMemoryBudget( groups.getDim1(), groups.getDim2(), 
 								groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()) );
 				
 				if( broadcastGroups //mapgroupedagg
-					&& getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp
-					&& ((LiteralOp)getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum")
+					&& getParameterHop(Statement.GAGG_FN) instanceof LiteralOp
+					&& ((LiteralOp)getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum")
 					&& inputlops.get(Statement.GAGG_NUM_GROUPS) != null ) 
 				{
-					Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
-					
-					grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);						
+					Hop target = getTargetHop();
+					grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
 					grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
 					//no reblock required (directly output binary block)
 				}
 				else //groupedagg (w/ or w/o broadcast)
 				{
-					grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);						
+					grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
 					grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
 					setRequiresReblock( true );	
 				}
@@ -430,9 +409,9 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 	private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) 
 		throws HopsException, LopsException 
 	{
-		Hop targetHop = getInput().get(_paramIndexMap.get("target"));
-		Hop marginHop = getInput().get(_paramIndexMap.get("margin"));		
-		Hop selectHop = (_paramIndexMap.get("select") != null) ? getInput().get(_paramIndexMap.get("select")):null;
+		Hop targetHop = getTargetHop();
+		Hop marginHop = getParameterHop("margin");
+		Hop selectHop = getParameterHop("select");
 		
 		if( et == ExecType.CP || et == ExecType.CP_FILE )
 		{
@@ -591,12 +570,9 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 						emptyInd.setForcedExecType(ExecType.MR); //always MR
 						HopRewriteUtils.copyLineNumbers(this, emptyInd);
 					}
-				} else {
+				} 
+				else {
 					emptyInd = selectHop;
-					emptyInd.setOutputBlocksizes(brlen, bclen);
-					emptyInd.refreshSizeInformation();
-					emptyInd.setForcedExecType(ExecType.MR); //always MR
-					HopRewriteUtils.copyLineNumbers(this, emptyInd);
 				}
 				
 				//Step 2: compute row offsets for non-empty rows
@@ -612,7 +588,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				Hop cumsumOutput = cumsum;
 				if( !rmRows ){
 					cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
-					HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);	
+					HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
 				}
 				
 				Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol); //alternative: right indexing
@@ -688,13 +664,13 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					HopRewriteUtils.removeChildReference(ppred0, input);
 				
 				setLops(finalagg);
-			}	
+			}
 		}
 		else if( et == ExecType.SPARK )
 		{
 			if( !(marginHop instanceof LiteralOp) )
 				throw new HopsException("Parameter 'margin' must be a literal argument.");
-				
+			
 			Hop input = targetHop;
 			long rlen = input.getDim1();
 			long clen = input.getDim2();
@@ -717,12 +693,9 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows?Direction.Row:Direction.Col);
 					emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
 				}
-			} else {
+			} 
+			else {
 				emptyInd = selectHop;
-				emptyInd.setOutputBlocksizes(brlen, bclen);
-				emptyInd.refreshSizeInformation();
-				emptyInd.setForcedExecType(ExecType.SPARK); //always Spark
-				HopRewriteUtils.copyLineNumbers(this, emptyInd);
 			}
 			
 			//Step 2: compute row offsets for non-empty rows
@@ -764,12 +737,12 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			ParameterizedBuiltin pbilop = new ParameterizedBuiltin( inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);			
 			setOutputDimensions(pbilop);
 			setLineNumbers(pbilop);
-		
+			
 			//Step 4: cleanup hops (allow for garbage collection)
 			if(selectHop == null)
 				HopRewriteUtils.removeChildReference(ppred0, input);
 			
-			setLops(pbilop);	
+			setLops(pbilop);
 			
 			//NOTE: in contrast to mr, replication and aggregation handled instruction-local
 		}
@@ -820,11 +793,11 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			String sep = " ";
 			String linesep = "\n";
 			
-			Hop rowsHop = getInputParameter("rows");
-			Hop colsHop = getInputParameter("cols");
-			Hop sparsePrintHOP = getInputParameter("sparse");
-			Hop sepHop = getInputParameter("sep");
-			Hop linesepHop = getInputParameter("linesep");
+			Hop rowsHop = getParameterHop("rows");
+			Hop colsHop = getParameterHop("cols");
+			Hop sparsePrintHOP = getParameterHop("sparse");
+			Hop sepHop = getParameterHop("sep");
+			Hop linesepHop = getParameterHop("linesep");
 			
 			long numNonZeroes = getInput().get(0).getNnz();
 			if (numNonZeroes < 0)
@@ -836,7 +809,6 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			if (numCols < 0)	// If number of columns is not known, set to default
 				numCols = specifiedCols;
 			
-			
 			// Assume Defaults : 100 * 100, sep = " ", linesep = "\n", sparse = false
 			// String size in bytes is 36 + number_of_chars * 2
 			final long DEFAULT_SIZE = 36 + 2 *
@@ -911,7 +883,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		
 		if( _op == ParamBuiltinOp.RMEMPTY )
 		{ 
-			Hop marginHop = getInput().get(_paramIndexMap.get("margin"));
+			Hop marginHop = getParameterHop("margin");
 			boolean cols =  marginHop instanceof LiteralOp 
 					&& "cols".equals(((LiteralOp)marginHop).getStringValue());
 			
@@ -924,7 +896,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				
 				//removeEmpty-cols has additional memory requirements for intermediate 
 				//data structures in order to make this a cache-friendly operation.
-				ret += OptimizerUtils.INT_SIZE * dim2;				
+				ret += OptimizerUtils.INT_SIZE * dim2;
 			}
 			else //rows
 			{
@@ -934,7 +906,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		}
 		else if( _op == ParamBuiltinOp.REXPAND )
 		{
-			Hop dir = getInput().get(_paramIndexMap.get("dir"));
+			Hop dir = getParameterHop("dir");
 			String dirVal = ((LiteralOp)dir).getStringValue();
 			if( "rows".equals(dirVal) )
 			{
@@ -964,7 +936,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		{
 			// Get the number of groups provided as part of aggregate() invocation, whenever available.
 			if ( _paramIndexMap.get(Statement.GAGG_NUM_GROUPS) != null ) {
-				Hop ngroups = getInput().get(_paramIndexMap.get(Statement.GAGG_NUM_GROUPS));
+				Hop ngroups = getParameterHop(Statement.GAGG_NUM_GROUPS);
 				if(ngroups != null && ngroups instanceof LiteralOp) {
 					long m = HopRewriteUtils.getIntValueSafe((LiteralOp)ngroups);
 					long n = (mc.getRows()==1)?1:mc.getCols();
@@ -989,14 +961,14 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			// change (denser output).
 			if ( mc.dimsKnown() ) {
 				String margin = "rows";
-				Hop marginHop = getInput().get(_paramIndexMap.get("margin"));
+				Hop marginHop = getParameterHop("margin");
 				if(    marginHop instanceof LiteralOp 
 						&& "cols".equals(((LiteralOp)marginHop).getStringValue()) )
 					margin = new String("cols");
 				
 				MatrixCharacteristics mcSelect = null;
 				if (_paramIndexMap.get("select") != null) {
-					Hop select = getInput().get(_paramIndexMap.get("select"));	
+					Hop select = getParameterHop("select");
 					mcSelect = memo.getAllInputStats(select);
 				}
 
@@ -1028,8 +1000,8 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			//dimensions are exactly known from input, sparsity unknown but upper bounded by nrow(v)
 			//note: cannot infer exact sparsity due to missing cast for outer and potential cutoff for table
 			//but very good sparsity estimate possible (number of non-zeros in input)
-			Hop max = getInput().get(_paramIndexMap.get("max"));
-			Hop dir = getInput().get(_paramIndexMap.get("dir"));
+			Hop max = getParameterHop("max");
+			Hop dir = getParameterHop("dir");
 			long maxVal = computeDimParameterInformation(max, memo);
 			String dirVal = ((LiteralOp)dir).getStringValue();
 			if( mc.dimsKnown() ) {
@@ -1074,22 +1046,20 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 
 		ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
 		
-		if( _etypeForced != null ) 			
+		if( _etypeForced != null )
 		{
-			_etype = _etypeForced;	
+			_etype = _etypeForced;
 		}
 		else 
 		{
 			if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
 				_etype = findExecTypeByMemEstimate();
 			}
-			else if (   _op == ParamBuiltinOp.GROUPEDAGG 
-					 && this.getInput().get(0).areDimsBelowThreshold() ) 
-			{
+			else if (   _op == ParamBuiltinOp.GROUPEDAGG
+				&& getTargetHop().areDimsBelowThreshold() ) {
 				_etype = ExecType.CP;
 			}
-			else
-			{
+			else {
 				_etype = REMOTE;
 			}
 			
@@ -1102,7 +1072,8 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			|| _op == ParamBuiltinOp.TRANSFORMDECODE && REMOTE==ExecType.MR
 			|| _op == ParamBuiltinOp.TRANSFORMMETA 
 			|| _op == ParamBuiltinOp.TOSTRING 
-			|| _op == ParamBuiltinOp.CDF || _op == ParamBuiltinOp.INVCDF) {
+			|| _op == ParamBuiltinOp.CDF 
+			|| _op == ParamBuiltinOp.INVCDF) {
 			_etype = ExecType.CP;
 		}
 		
@@ -1126,13 +1097,13 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				// output dimension dim1 is completely data dependent 
 				long ldim1 = -1;
 				if ( _paramIndexMap.get(Statement.GAGG_NUM_GROUPS) != null ) {
-					Hop ngroups = getInput().get(_paramIndexMap.get(Statement.GAGG_NUM_GROUPS));
+					Hop ngroups = getParameterHop(Statement.GAGG_NUM_GROUPS);
 					if(ngroups != null && ngroups instanceof LiteralOp) {
 						ldim1 = HopRewriteUtils.getIntValueSafe((LiteralOp)ngroups);
 					}
 				}
 				
-				Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
+				Hop target = getTargetHop();
 				long ldim2 = (target.getDim1()==1)?1:target.getDim2(); 
 				
 				setDim1( ldim1 );
@@ -1141,8 +1112,8 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			}
 			case RMEMPTY: {
 				//one output dimension dim1 or dim2 is completely data dependent 
-				Hop target = getInput().get(_paramIndexMap.get("target"));
-				Hop margin = getInput().get(_paramIndexMap.get("margin"));
+				Hop target = getTargetHop();
+				Hop margin = getParameterHop("margin");
 				if( margin instanceof LiteralOp ) {
 					LiteralOp lmargin = (LiteralOp)margin;
 					if( "rows".equals(lmargin.getStringValue()) )
@@ -1155,7 +1126,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			}
 			case REPLACE: {
 				//dimensions are exactly known from input, sparsity might increase/decrease if pattern/replacement 0 
-				Hop target = getInput().get(_paramIndexMap.get("target"));
+				Hop target = getTargetHop();
 				setDim1( target.getDim1() );
 				setDim2( target.getDim2() );
 				if( isNonZeroReplaceArguments() )
@@ -1166,9 +1137,9 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			case REXPAND: {
 				//dimensions are exactly known from input, sparsity unknown but upper bounded by nrow(v)
 				//note: cannot infer exact sparsity due to missing cast for outer and potential cutoff for table
-				Hop target = getInput().get(_paramIndexMap.get("target"));
-				Hop max = getInput().get(_paramIndexMap.get("max"));
-				Hop dir = getInput().get(_paramIndexMap.get("dir"));
+				Hop target = getTargetHop();
+				Hop max = getParameterHop("max");
+				Hop dir = getParameterHop("dir");
 				double maxVal = computeSizeInformation(max);
 				String dirVal = ((LiteralOp)dir).getStringValue();
 				
@@ -1181,10 +1152,10 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 					setDim2(target.getDim1());
 				}
 				
-				break;	
+				break;
 			}
 			case TRANSFORMDECODE: {
-				Hop target = getInput().get(_paramIndexMap.get("target"));
+				Hop target = getTargetHop();
 				//rows remain unchanged for recoding and dummy coding
 				setDim1( target.getDim1() );
 				//cols remain unchanged only if no dummy coding
@@ -1276,17 +1247,13 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 	{
 		boolean ret = false;
 		
-		try
-		{
-			if( _op == ParamBuiltinOp.GROUPEDAGG )
-			{
-				int ix = _paramIndexMap.get(Statement.GAGG_FN);
-				Hop fnHop = getInput().get(ix);
+		try {
+			if( _op == ParamBuiltinOp.GROUPEDAGG ) {
+				Hop fnHop = getParameterHop(Statement.GAGG_FN);
 				ret = (fnHop instanceof LiteralOp && Statement.GAGG_FN_COUNT.equals(((LiteralOp)fnHop).getStringValue()) );
 			}
 		}
 		catch(Exception ex){
-			//silent false
 			LOG.warn("Check for count function failed, continue assuming false.", ex);
 		}
 		
@@ -1302,17 +1269,16 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		boolean ret = false;
 		try 
 		{
-			Hop pattern = getInput().get(_paramIndexMap.get("pattern"));
-			Hop replace = getInput().get(_paramIndexMap.get("replacement"));
+			Hop pattern = getParameterHop("pattern");
+			Hop replace = getParameterHop("replacement");
 			if( pattern instanceof LiteralOp && ((LiteralOp)pattern).getDoubleValue()!=0d &&
 			    replace instanceof LiteralOp && ((LiteralOp)replace).getDoubleValue()!=0d )
 			{
 				ret = true;
 			}
 		}
-		catch(Exception ex) 
-		{
-			LOG.warn(ex.getMessage());	
+		catch(Exception ex) {
+			LOG.warn(ex.getMessage());
 		}
 		
 		return ret;
@@ -1351,6 +1317,6 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 		}
 		
 		return ret;
-	}	
+	}
 	
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c145dbe3/src/test/java/org/apache/sysml/test/integration/functions/recompile/RemoveEmptyPotpourriTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/recompile/RemoveEmptyPotpourriTest.java b/src/test/java/org/apache/sysml/test/integration/functions/recompile/RemoveEmptyPotpourriTest.java
index bc206e1..31a6287 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/recompile/RemoveEmptyPotpourriTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/recompile/RemoveEmptyPotpourriTest.java
@@ -21,6 +21,7 @@ package org.apache.sysml.test.integration.functions.recompile;
 
 import java.util.HashMap;
 
+import org.junit.Assert;
 import org.junit.Test;
 
 import org.apache.sysml.hops.OptimizerUtils;
@@ -28,6 +29,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysml.test.integration.AutomatedTestBase;
 import org.apache.sysml.test.integration.TestConfiguration;
 import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
 
 /**
  * The main purpose of this test is to ensure that encountered and fixed
@@ -42,6 +44,7 @@ public class RemoveEmptyPotpourriTest extends AutomatedTestBase
 	private final static String TEST_NAME2 = "remove_empty_potpourri2";
 	private final static String TEST_NAME3 = "remove_empty_potpourri3";
 	private final static String TEST_NAME4 = "remove_empty_potpourri4";
+	private final static String TEST_NAME5 = "remove_empty_potpourri5";
 	
 	private final static String TEST_DIR = "functions/recompile/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + RemoveEmptyPotpourriTest.class.getSimpleName() + "/";
@@ -49,13 +52,13 @@ public class RemoveEmptyPotpourriTest extends AutomatedTestBase
 	
 	
 	@Override
-	public void setUp() 
-	{
+	public void setUp() {
 		TestUtils.clearAssertionInformation();
 		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }));
 		addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "R" }));
 		addTestConfiguration(TEST_NAME3, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3, new String[] { "R" }));
 		addTestConfiguration(TEST_NAME4, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME4, new String[] { "R" }));
+		addTestConfiguration(TEST_NAME5, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME5, new String[] { "R" }));
 	}
 	
 	@Test
@@ -97,6 +100,16 @@ public class RemoveEmptyPotpourriTest extends AutomatedTestBase
 	public void testRemoveEmptyComplexDagSplit2Rewrite() {
 		runRemoveEmptyTest(TEST_NAME4, true);
 	}
+	
+	@Test
+	public void testRemoveEmptyWithSelectRecompile() {
+		runRemoveEmptyTest(TEST_NAME5, false);
+	}
+	
+	@Test
+	public void testRemoveEmptyWithSelectRecompileRewrite() {
+		runRemoveEmptyTest(TEST_NAME5, true);
+	}
 
 	/**
 	 * 
@@ -125,7 +138,12 @@ public class RemoveEmptyPotpourriTest extends AutomatedTestBase
 			//compare matrices
 			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R");
 			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("R");
-			TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R");	
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R");
+			
+			if( TEST_NAME.equals(TEST_NAME5) ) {
+				Assert.assertTrue(Statistics.getNoOfExecutedMRJobs()==0);
+				Assert.assertTrue(Statistics.getNoOfExecutedSPInst()==0);
+			}
 		}
 		finally
 		{

http://git-wip-us.apache.org/repos/asf/systemml/blob/c145dbe3/src/test/scripts/functions/recompile/remove_empty_potpourri5.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri5.R b/src/test/scripts/functions/recompile/remove_empty_potpourri5.R
new file mode 100644
index 0000000..d03c7c3
--- /dev/null
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri5.R
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+X = matrix(1, 1000, 3);
+w = sum(X);
+Z = seq(1,w) + 7;
+
+writeMM(as(Z, "CsparseMatrix"), paste(args[1], "R", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/c145dbe3/src/test/scripts/functions/recompile/remove_empty_potpourri5.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/recompile/remove_empty_potpourri5.dml b/src/test/scripts/functions/recompile/remove_empty_potpourri5.dml
new file mode 100644
index 0000000..de02c8c
--- /dev/null
+++ b/src/test/scripts/functions/recompile/remove_empty_potpourri5.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(1, 1000, 3);
+w = sum(X);
+while(FALSE){}
+
+Y = seq(1,w) + 7;
+Z = removeEmpty (target=Y, margin="rows", select=Y);
+
+write(Z, $1);