You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/08/11 04:14:16 UTC

systemml git commit: [SYSTEMML-1834] Codegen plan enumeration with partial costing

Repository: systemml
Updated Branches:
  refs/heads/master b2700839b -> 16e803f2a


[SYSTEMML-1834] Codegen plan enumeration with partial costing

This PR adds conditions to the fusion plan enumerator to stop costing a
plan when its partially computed cost exceeds the cost of the best plan
found so far (an upper bound).

The Statistics class reports the number of partially costed plans
separately from the number of fully costed plans. I attempted to count
the number of Hops costed per plan in order to measure the effectiveness
of stopping costing early at a more fine-grained level, but this count
was inaccurate for some reason. On StratStats, 2301 out of 3742 plans
can be stopped costed early.

Closes #611.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/16e803f2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/16e803f2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/16e803f2

Branch: refs/heads/master
Commit: 16e803f2a2ea160f99b709af0d0cecba053b8c4f
Parents: b270083
Author: Dylan Hutchison <dh...@cs.washington.edu>
Authored: Thu Aug 10 21:04:45 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Aug 10 21:04:46 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/SpoofCompiler.java       |   8 +-
 .../opt/PlanSelectionFuseCostBasedV2.java       | 123 +++++++++++--------
 .../hops/codegen/opt/ReachabilityGraph.java     |  10 +-
 .../java/org/apache/sysml/utils/Statistics.java |  43 +++++--
 src/test/config/SystemML-config.xml             |   2 +-
 5 files changed, 116 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/16e803f2/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 49a1686..ca3c233 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -558,10 +558,10 @@ public class SpoofCompiler
 		//generate cplan for existing memo table entry
 		if( memo.containsTopLevel(hop.getHopID()) ) {
 			cplans.put(hop.getHopID(), TemplateUtils
-				.createTemplate(memo.getBest(hop.getHopID()).type)
-				.constructCplan(hop, memo, compileLiterals));
-			if( DMLScript.STATISTICS )
-				Statistics.incrementCodegenCPlanCompile(1); 
+					.createTemplate(memo.getBest(hop.getHopID()).type)
+					.constructCplan(hop, memo, compileLiterals));
+			if (DMLScript.STATISTICS)
+				Statistics.incrementCodegenCPlanCompile(1);
 		}
 		
 		//process children recursively, but skip compiled operator

http://git-wip-us.apache.org/repos/asf/systemml/blob/16e803f2/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index e66c9c3..f818c06 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -25,11 +25,11 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.Map.Entry;
-import java.util.stream.Collectors;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map.Entry;
+import java.util.stream.Collectors;
 
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.ArrayUtils;
@@ -52,11 +52,11 @@ import org.apache.sysml.hops.TernaryOp;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.codegen.opt.ReachabilityGraph.SubProblem;
 import org.apache.sysml.hops.codegen.template.CPlanMemoTable;
+import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
+import org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType;
 import org.apache.sysml.hops.codegen.template.TemplateOuterProduct;
 import org.apache.sysml.hops.codegen.template.TemplateRow;
 import org.apache.sysml.hops.codegen.template.TemplateUtils;
-import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
-import org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.runtime.codegen.LibSpoofPrimitives;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
@@ -148,7 +148,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 				for( Long hopID : part.getPartition() )
 					memo.pruneRedundant(hopID, true, part.getMatPointsExt());
 			}
-			
+
 			//enumerate and cost plans, returns optional plan
 			boolean[] bestPlan = enumPlans(memo, part, costs, rgraph, 
 					part.getMatPointsExt(), 0, Double.MAX_VALUE);
@@ -188,20 +188,23 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 	 * @return optimal assignment of materialization points
 	 */
 	private static boolean[] enumPlans(CPlanMemoTable memo, PlanPartition part, StaticCosts costs, 
-		ReachabilityGraph rgraph, InterestingPoint[] matPoints, int off, double bestC) 
+		ReachabilityGraph rgraph, InterestingPoint[] matPoints, int off, double bestC)
 	{
 		//scan linearized search space, w/ skips for branch and bound pruning
 		//and structural pruning (where we solve conditionally independent problems)
 		//bestC is monotonically non-increasing and serves as the upper bound
-		long len = (long)Math.pow(2, matPoints.length-off);
+		long len = 1L << matPoints.length-off;
 		boolean[] bestPlan = null;
-		int numEvalPlans = 0;
-		
+		long numEvalPlans = 0, numEvalPartialPlans = 0, numSkipPlans = 0;
+
 		for( long i=0; i<len; i++ ) {
 			//construct assignment
 			boolean[] plan = createAssignment(matPoints.length-off, off, i);
+			if( bestPlan == null )
+				bestPlan = plan;
 			long pskip = 0; //skip after costing
-			
+
+
 			//skip plans with structural pruning
 			if( USE_STRUCTURAL_PRUNING && (rgraph!=null) && rgraph.isCutSet(plan) ) {
 				//compute skip (which also acts as boundary for subproblems)
@@ -222,43 +225,52 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 			}
 			//skip plans with branch and bound pruning (cost)
 			else if( USE_COST_PRUNING ) {
-				double lbC = Math.max(costs._read, costs._compute) + costs._write 
-					+ getMaterializationCost(part, matPoints, memo, plan);
+				double lbC = Math.max(costs._read, costs._compute) + costs._write
+						+ getMaterializationCost(part, matPoints, memo, plan);
 				if( lbC >= bestC ) {
 					long skip = getNumSkipPlans(plan);
 					if( LOG.isTraceEnabled() )
 						LOG.trace("Enum: Skip "+skip+" plans (by cost).");
 					i += skip - 1;
+					numSkipPlans += skip;
 					continue;
 				}
 			}
 			
-			//cost assignment on hops
-			double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts);
-			numEvalPlans ++;
-			if( LOG.isTraceEnabled() )
-				LOG.trace("Enum: "+Arrays.toString(plan)+" -> "+C);
-			
-			//cost comparisons
-			if( bestPlan == null || C < bestC ) {
-				bestC = C;
-				bestPlan = plan;
-				if( LOG.isTraceEnabled() )
-					LOG.trace("Enum: Found new best plan.");
+			//cost assignment on hops. Stop early if exceeds bestC.
+			double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, bestC);
+			if (LOG.isTraceEnabled())
+				LOG.trace("Enum: " + Arrays.toString(plan) + " -> " + C);
+			if( C == Double.POSITIVE_INFINITY ) {
+				numEvalPartialPlans++;
+			} else {
+				numEvalPlans++;
+				//cost comparisons
+				if( C < bestC ) {
+					bestC = C;
+					bestPlan = plan;
+					if( LOG.isTraceEnabled() )
+						LOG.trace("Enum: Found new best plan.");
+				}
 			}
-			
+
 			//post skipping
 			i += pskip;
+			numSkipPlans += pskip;
 			if( pskip !=0 && LOG.isTraceEnabled() )
 				LOG.trace("Enum: Skip "+pskip+" plans (by structure).");
 		}
 		
-		if( DMLScript.STATISTICS )
+		if( DMLScript.STATISTICS ) {
 			Statistics.incrementCodegenFPlanCompile(numEvalPlans);
+			Statistics.incrementCodegenFPlanPartialCost(numEvalPartialPlans);
+			Statistics.incrementCodegenFPlanSkip(numSkipPlans);
+		}
 		if( LOG.isTraceEnabled() )
 			LOG.trace("Enum: Optimal plan: "+Arrays.toString(bestPlan));
 		
 		//copy best plan w/o fixed offset plan
+		assert bestPlan != null;
 		return Arrays.copyOfRange(bestPlan, off, bestPlan.length);
 	}
 	
@@ -267,15 +279,16 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		Arrays.fill(ret, 0, off, true);
 		long tmp = pos;
 		for( int i=0; i<len; i++ ) {
-			ret[off+i] = (tmp >= Math.pow(2, len-i-1));
-			tmp %= Math.pow(2, len-i-1);
+			long mask = 1L << len-i-1;
+			ret[off+i] = tmp >= mask;
+			tmp %= mask;
 		}
 		return ret;	
 	}
 	
 	private static long getNumSkipPlans(boolean[] plan) {
 		int pos = ArrayUtils.lastIndexOf(plan, true);
-		return (long) Math.pow(2, plan.length-pos-1);
+		return 1L << plan.length-pos-1;
 	}
 	
 	private static double getMaterializationCost(PlanPartition part, InterestingPoint[] M, CPlanMemoTable memo, boolean[] plan) {
@@ -757,7 +770,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 	//////////
 	
 	private static double getPlanCost(CPlanMemoTable memo, PlanPartition part, 
-		InterestingPoint[] matPoints,boolean[] plan, HashMap<Long, Double> computeCosts) 
+			InterestingPoint[] matPoints,boolean[] plan, HashMap<Long, Double> computeCosts,
+			final double bestC)
 	{
 		//high level heuristic: every hop or fused operator has the following cost: 
 		//WRITE + max(COMPUTE, READ), where WRITE costs are given by the output size, 
@@ -766,17 +780,25 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		
 		HashSet<VisitMarkCost> visited = new HashSet<>();
 		double costs = 0;
+		int rem = part.getRoots().size();
 		for( Long hopID : part.getRoots() ) {
 			costs += rGetPlanCosts(memo, memo.getHopRefs().get(hopID), 
-				visited, part, matPoints, plan, computeCosts, null, null);
+				visited, part, matPoints, plan, computeCosts, null, null, bestC - costs);
+			rem--;
+			// stop early if we exceed bestC
+			if( costs >= bestC && rem > 0 ) {
+				costs = Double.POSITIVE_INFINITY;
+				break;
+			}
 		}
 		return costs;
 	}
 	
-	private static double rGetPlanCosts(CPlanMemoTable memo, Hop current, HashSet<VisitMarkCost> visited, 
-		PlanPartition part, InterestingPoint[] matPoints, boolean[] plan, HashMap<Long, Double> computeCosts, 
-		CostVector costsCurrent, TemplateType currentType) 
+	private static double rGetPlanCosts(CPlanMemoTable memo, final Hop current, HashSet<VisitMarkCost> visited,
+			PlanPartition part, InterestingPoint[] matPoints, boolean[] plan, HashMap<Long, Double> computeCosts,
+			CostVector costsCurrent, TemplateType currentType, final double costBudget)
 	{
+		final long currentHopId = current.getHopID();
 		//memoization per hop id and cost vector to account for redundant
 		//computation without double counting materialized results or compute
 		//costs of complex operation DAGs within a single fused operator
@@ -788,21 +810,20 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		//under awareness of current plan choice
 		MemoTableEntry best = null;
 		boolean opened = (currentType == null);
-		if( memo.contains(current.getHopID()) ) {
+		if( memo.contains(currentHopId) ) {
 			//note: this is the inner loop of plan enumeration and hence, we do not 
 			//use streams, lambda expressions, etc to avoid unnecessary overhead
-			long hopID = current.getHopID();
 			if( currentType == null ) {
-				for( MemoTableEntry me : memo.get(hopID) )
+				for( MemoTableEntry me : memo.get(currentHopId) )
 					best = isValid(me, current) 
-						&& hasNoRefToMatPoint(hopID, me, matPoints, plan) 
+						&& hasNoRefToMatPoint(currentHopId, me, matPoints, plan)
 						&& BasicPlanComparator.icompare(me, best)<0 ? me : best;
 				opened = true;
 			}
 			else {
-				for( MemoTableEntry me : memo.get(hopID) )
+				for( MemoTableEntry me : memo.get(currentHopId) )
 					best = (me.type == currentType || me.type==TemplateType.CELL)
-						&& hasNoRefToMatPoint(hopID, me, matPoints, plan) 
+						&& hasNoRefToMatPoint(currentHopId, me, matPoints, plan)
 						&& TypedPlanComparator.icompare(me, best, currentType)<0 ? me : best;
 			}
 		}
@@ -814,11 +835,13 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		//add other roots for multi-agg template to account for shared costs
 		if( opened && best != null && best.type == TemplateType.MAGG ) {
 			//account costs to first multi-agg root 
-			if( best.input1 == current.getHopID() )
+			if( best.input1 == currentHopId )
 				for( int i=1; i<3; i++ ) {
 					if( !best.isPlanRef(i) ) continue;
 					costs += rGetPlanCosts(memo, memo.getHopRefs().get(best.input(i)), visited, 
-						part, matPoints, plan, computeCosts, costVect, TemplateType.MAGG);
+						part, matPoints, plan, computeCosts, costVect, TemplateType.MAGG, costBudget - costs);
+					if( costs >= costBudget )
+						return Double.POSITIVE_INFINITY;
 				}
 			//skip other multi-agg roots
 			else
@@ -826,28 +849,32 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		}
 		
 		//add compute costs of current operator to costs vector
-		costVect.computeCosts += computeCosts.get(current.getHopID());
+		costVect.computeCosts += computeCosts.get(currentHopId);
 		
 		//process children recursively
 		for( int i=0; i< current.getInput().size(); i++ ) {
 			Hop c = current.getInput().get(i);
 			if( best!=null && best.isPlanRef(i) )
-				costs += rGetPlanCosts(memo, c, visited, part, matPoints, plan, computeCosts, costVect, best.type);
+				costs += rGetPlanCosts(memo, c, visited, part, matPoints,
+						plan, computeCosts, costVect, best.type, costBudget - costs);
 			else if( best!=null && isImplicitlyFused(current, i, best.type) )
 				costVect.addInputSize(c.getInput().get(0).getHopID(), getSize(c));
 			else { //include children and I/O costs
 				if( part.getPartition().contains(c.getHopID()) )
-					costs += rGetPlanCosts(memo, c, visited, part, matPoints, plan, computeCosts, null, null);
+					costs += rGetPlanCosts(memo, c, visited, part, matPoints,
+						plan, computeCosts, null, null, costBudget - costs);
 				if( costVect != null && c.getDataType().isMatrix() )
 					costVect.addInputSize(c.getHopID(), getSize(c));
 			}
+			if( costs >= costBudget )
+				return Double.POSITIVE_INFINITY;
 		}
 		
 		//add costs for opened fused operator
 		if( opened ) {
 			if( LOG.isTraceEnabled() ) {
 				String type = (best !=null) ? best.type.name() : "HOP";
-				LOG.trace("Cost vector ("+type+" "+current.getHopID()+"): "+costVect);
+				LOG.trace("Cost vector ("+type+" "+currentHopId+"): "+costVect);
 			}
 			double tmpCosts = costVect.outSize * 8 / WRITE_BANDWIDTH //time for output write
 				+ Math.max(costVect.getSumInputSizes() * 8 / READ_BANDWIDTH,
@@ -861,8 +888,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		}
 		//add costs for non-partition read in the middle of fused operator
 		else if( part.getExtConsumed().contains(current.getHopID()) ) {
-			costs += rGetPlanCosts(memo, current, visited,
-				part, matPoints, plan, computeCosts, null, null);
+			costs += rGetPlanCosts(memo, current, visited, part, matPoints, plan,
+				computeCosts, null, null, costBudget - costs);
 		}
 		
 		//sanity check non-negative costs

http://git-wip-us.apache.org/repos/asf/systemml/blob/16e803f2/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
index de1ed92..fb7c8d9 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
@@ -175,7 +175,7 @@ public class ReachabilityGraph
 		for( CutSet cs : _cutSets )
 			if( isCutSet(cs, plan) ) {
 				int pos = cs.posCut[cs.posCut.length-1];				
-				return (long) Math.pow(2, plan.length-pos-1);
+				return 1L << plan.length-pos-1;
 			}
 		throw new RuntimeException("Failed to compute "
 			+ "number of skip plans for plan without cutset.");
@@ -240,11 +240,11 @@ public class ReachabilityGraph
 			if( !CollectionUtils.containsAny(part1, part2) 
 				&& !part1.isEmpty() && !part2.isEmpty()) {
 				//score cutsets (smaller is better)
-				double base = Math.pow(2, _matPoints.size());
-				double numComb = Math.pow(2, cand.size());
+				double base = 1L << _matPoints.size();
+				double numComb = 1L << cand.size();
 				double score = (numComb-1)/numComb * base
-					+ 1/numComb * Math.pow(2, part1.size())
-					+ 1/numComb * Math.pow(2, part2.size());
+					+ 1/numComb * (1L << part1.size())
+					+ 1/numComb * (1L << part2.size());
 				
 				//construct cutset
 				cutSets.add(Pair.of(new CutSet(

http://git-wip-us.apache.org/repos/asf/systemml/blob/16e803f2/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java
index 8891a6c..b294ba6 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -77,6 +77,8 @@ public class Statistics
 	private static final LongAdder codegenClassCompileTime = new LongAdder(); //in nano
 	private static final LongAdder codegenHopCompile = new LongAdder(); //count
 	private static final LongAdder codegenFPlanCompile = new LongAdder(); //count
+	private static final LongAdder codegenFPlanPartialCost = new LongAdder(); //count
+	private static final LongAdder codegenFPlanSkip = new LongAdder(); //count
 	private static final LongAdder codegenCPlanCompile = new LongAdder(); //count
 	private static final LongAdder codegenClassCompile = new LongAdder(); //count
 	private static final LongAdder codegenPlanCacheHits = new LongAdder(); //count
@@ -260,6 +262,12 @@ public class Statistics
 	public static void incrementCodegenFPlanCompile(long delta) {
 		codegenFPlanCompile.add(delta);
 	}
+	public static void incrementCodegenFPlanPartialCost(long delta) {
+		codegenFPlanPartialCost.add(delta);
+	}
+	public static void incrementCodegenFPlanSkip(long delta) {
+		codegenFPlanSkip.add(delta);
+	}
 	
 	public static void incrementCodegenClassCompile() {
 		codegenClassCompile.increment();
@@ -292,6 +300,12 @@ public class Statistics
 	public static long getCodegenFPlanCompile() {
 		return codegenFPlanCompile.longValue();
 	}
+	public static long getCodegenFPlanPartialCost() {
+		return codegenFPlanPartialCost.longValue();
+	}
+	public static long getCodegenFPlanSkip() {
+		return codegenFPlanSkip.longValue();
+	}
 	
 	public static long getCodegenClassCompile() {
 		return codegenClassCompile.longValue();
@@ -387,6 +401,8 @@ public class Statistics
 		
 		codegenHopCompile.reset();
 		codegenFPlanCompile.reset();
+		codegenFPlanPartialCost.reset();
+		codegenFPlanSkip.reset();
 		codegenCPlanCompile.reset();
 		codegenClassCompile.reset();
 		codegenCompileTime.reset();
@@ -725,10 +741,10 @@ public class Statistics
 		
 		sb.append("SystemML Statistics:\n");
 		if( DMLScript.STATISTICS ) {
-			sb.append("Total elapsed time:\t\t" + String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // nanoSec --> sec
-			sb.append("Total compilation time:\t\t" + String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec
+			sb.append("Total elapsed time:\t\t\t\t" + String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // nanoSec --> sec
+			sb.append("Total compilation time:\t\t\t" + String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec
 		}
-		sb.append("Total execution time:\t\t" + String.format("%.3f", getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec
+		sb.append("Total execution time:\t\t\t" + String.format("%.3f", getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec
 		if( OptimizerUtils.isSparkExecutionMode() ) {
 			if( DMLScript.STATISTICS ) //moved into stats on Shiv's request
 				sb.append("Number of compiled Spark inst:\t" + getNoOfCompiledSPInst() + ".\n");
@@ -764,26 +780,29 @@ public class Statistics
 			}
 			
 			sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + CacheStatistics.displayHits() + ".\n");
-			sb.append("Cache writes (WB, FS, HDFS):\t" + CacheStatistics.displayWrites() + ".\n");
+			sb.append("Cache writes (WB, FS, HDFS):   \t" + CacheStatistics.displayWrites() + ".\n");
 			sb.append("Cache times (ACQr/m, RLS, EXP):\t" + CacheStatistics.displayTime() + " sec.\n");
 			sb.append("HOP DAGs recompiled (PRED, SB):\t" + getHopRecompiledPredDAGs() + "/" + getHopRecompiledSBDAGs() + ".\n");
-			sb.append("HOP DAGs recompile time:\t" + String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
+			sb.append("HOP DAGs recompile time:       \t" + String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
 			if( getFunRecompiles()>0 ) {
 				sb.append("Functions recompiled:\t\t" + getFunRecompiles() + ".\n");
 				sb.append("Functions recompile time:\t" + String.format("%.3f", ((double)getFunRecompileTime())/1000000000) + " sec.\n");	
 			}
 			if( ConfigurationManager.isCodegenEnabled() ) {
-				sb.append("Codegen compile (DAG,FP,CP,JC):\t" + getCodegenDAGCompile() + "/" + getCodegenFPlanCompile() 
-					+ "/" + getCodegenCPlanCompile() + "/" + getCodegenClassCompile() + ".\n");
+				sb.append("Codegen compile    (DAG,CP,JC):\t" + getCodegenDAGCompile() + "/"
+						+ getCodegenCPlanCompile() + "/" + getCodegenClassCompile() + ".\n");
+				sb.append("Codegen enum  (full,part,skip):\t" + getCodegenFPlanCompile() + "/"
+						+ getCodegenFPlanPartialCost() + "/"
+						+ getCodegenFPlanSkip() + ".\n");
 				sb.append("Codegen compile times (DAG,JC):\t" + String.format("%.3f", (double)getCodegenCompileTime()/1000000000) + "/" + 
 						String.format("%.3f", (double)getCodegenClassCompileTime()/1000000000)  + " sec.\n");
-				sb.append("Codegen plan cache hits:\t" + getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n");
+				sb.append("Codegen plan cache hits:       \t" + getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n");
 			}
 			if( OptimizerUtils.isSparkExecutionMode() ){
 				String lazy = SparkExecutionContext.isLazySparkContextCreation() ? "(lazy)" : "(eager)";
 				sb.append("Spark ctx create time "+lazy+":\t"+
 						String.format("%.3f", ((double)sparkCtxCreateTime)*1e-9)  + " sec.\n" ); // nanoSec --> sec
-				sb.append("Spark trans counts (par,bc,col):" +
+				sb.append("Spark trans counts(par,bc,col):\t" +
 						String.format("%d/%d/%d.\n", sparkParallelizeCount.longValue(), 
 								sparkBroadcastCount.longValue(), sparkCollectCount.longValue()));
 				sb.append("Spark trans times (par,bc,col):\t" +
@@ -800,9 +819,9 @@ public class Statistics
 				sb.append("ParFor total update in-place:\t" + lTotalUIPVar + "/" + lTotalLixUIP + "/" + lTotalLix + "\n");
 			}
 
-			sb.append("Total JIT compile time:\t\t" + ((double)getJITCompileTime())/1000 + " sec.\n");
-			sb.append("Total JVM GC count:\t\t" + getJVMgcCount() + ".\n");
-			sb.append("Total JVM GC time:\t\t" + ((double)getJVMgcTime())/1000 + " sec.\n");
+			sb.append("Total JIT compile time:\t\t\t" + ((double)getJITCompileTime())/1000 + " sec.\n");
+			sb.append("Total JVM GC count:\t\t\t\t" + getJVMgcCount() + ".\n");
+			sb.append("Total JVM GC time:\t\t\t\t" + ((double)getJVMgcTime())/1000 + " sec.\n");
 			LibMatrixDNN.appendStatistics(sb);
 			sb.append("Heavy hitter instructions:\n" + getHeavyHitters(maxHeavyHitters));
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/16e803f2/src/test/config/SystemML-config.xml
----------------------------------------------------------------------
diff --git a/src/test/config/SystemML-config.xml b/src/test/config/SystemML-config.xml
index 9b52a6d..3b25d99 100644
--- a/src/test/config/SystemML-config.xml
+++ b/src/test/config/SystemML-config.xml
@@ -25,7 +25,7 @@
    <scratch>scratch_space</scratch> 
 
    <!-- compiler optimization level, valid values: 0 | 1 | 2 | 3 | 4, default: 2 -->
-   <optlevel>2</optlevel>  
+   <optlevel>2</optlevel>
 
    <!-- default number of reduce tasks per MR job, default: 2 x number of nodes -->
    <numreducers>10</numreducers>