You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/27 06:39:28 UTC

[1/2] systemml git commit: [SYSTEMML-1937] Increase IPA repetitions and new IPA fixpoint check

Repository: systemml
Updated Branches:
  refs/heads/master c5bec0082 -> 328e8a002


[SYSTEMML-1937] Increase IPA repetitions and new IPA fixpoint check 

This patch increases the number of IPA repetitions from 2 to 3 and
compares function call size summaries per repetition for early abort. On
a GLM binomial-probit scenario (100M x 10, 20/10 outer/inner iterations)
this patch improved performance from 496s to 337s.

Furthermore, this also includes a fix of test assertions for
SYSTEMML-1935 that did not show up in local runs (tightened conditions)
as well as additional minor simplifications of the GLM dml script.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/682fc445
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/682fc445
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/682fc445

Branch: refs/heads/master
Commit: 682fc4458fdefa727daa010bdc753cb5b22bb3b0
Parents: c5bec00
Author: Matthias Boehm <mb...@gmail.com>
Authored: Tue Sep 26 16:32:29 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Tue Sep 26 23:38:53 2017 -0700

----------------------------------------------------------------------
 scripts/algorithms/GLM.dml                      | 23 ++++------
 .../org/apache/sysml/hops/OptimizerUtils.java   | 18 +++-----
 .../sysml/hops/ipa/FunctionCallSizeInfo.java    | 12 +++++
 .../sysml/hops/ipa/InterProceduralAnalysis.java | 10 ++++
 .../org/apache/sysml/parser/DMLTranslator.java  |  2 +-
 ...antFoldingScalarVariablePropagationTest.java |  6 +--
 .../recompile/IPAConstantPropagationTest.java   | 48 ++++++--------------
 7 files changed, 56 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/scripts/algorithms/GLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM.dml b/scripts/algorithms/GLM.dml
index eab1256..0e31a1e 100644
--- a/scripts/algorithms/GLM.dml
+++ b/scripts/algorithms/GLM.dml
@@ -894,10 +894,9 @@ glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double]
     
         [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);
         
-        if (isNaN == 0) {            
-            does_prob_contradict = (Y_prob <= 0);
-            if (sum (does_prob_contradict * abs (Y)) == 0) {
-                log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));
+        if (isNaN == 0) {
+            if( sum((Y_prob<=0) * abs(Y)) == 0 ) {
+                log_l = sum (Y * log (Y_prob * (1 - (Y_prob<=0)) + (Y_prob<=0)));
                 if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
                     isNaN = 1;
                 }
@@ -922,10 +921,6 @@ binomial_probability_two_column =
     # Define some auxiliary matrices
 
     ones_2 = matrix (1.0, rows = 1, cols = 2);
-    p_one_m_one = matrix("1 -1", 1, 2);
-    m_one_p_one = matrix("-1 1", 1, 2);
-    zero_one = matrix("0 1", 1, 2);
-    one_zero = matrix("1 0", 1, 2);
     zeros_r = matrix(0, nrow(linear_terms), 1);
     ones_r = matrix(1, nrow(linear_terms), 1);
 
@@ -934,11 +929,11 @@ binomial_probability_two_column =
     Y_prob = zeros_r %*% ones_2;
     if (link_type == 1) { # Binomial.power
         if          (link_power == 0) { # Binomial.log
-            Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    
+            Y_prob = cbind(exp(linear_terms), 1-exp(linear_terms));
         } else if (link_power == 0.5) { # Binomial.sqrt
-            Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    
+            Y_prob = cbind(linear_terms^2, 1-linear_terms^2);
         } else if (sum (linear_terms < 0) == 0) { # Binomial.power_nonlog
-            Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    
+            Y_prob = cbind(linear_terms^(1.0/link_power), 1-linear_terms^(1.0/link_power));
         } else {
             isNaN = 1;
         }
@@ -950,7 +945,7 @@ binomial_probability_two_column =
             Y_prob = cbind(exp(finite_linear_terms), ones_r);
             Y_prob = Y_prob / rowSums (Y_prob);
         } else if (link_type == 3)    { # Binomial.probit
-            lt_pos_neg = (finite_linear_terms >= 0) %*% p_one_m_one + ones_r %*% zero_one;
+            lt_pos_neg = cbind(finite_linear_terms>=0, finite_linear_terms<0);
             t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)
             pt_gp = t_gp * ( 0.254829592 
                   + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
@@ -966,12 +961,12 @@ binomial_probability_two_column =
             Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);
             Y_prob [, 2] = the_exp_exp;
         } else if (link_type == 5)    { # Binomial.cauchit
-            Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / 3.1415926535897932384626433832795;
+            Y_prob = 0.5 + cbind(atan(finite_linear_terms), -atan(finite_linear_terms)) / 3.1415926535897932384626433832795;
         } else {
             isNaN = 1;
         }
         Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
-}   }            
+}   }
 
 
 # THE CG-STEIHAUG PROCEDURE SCRIPT

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 82b1848..0215100 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -157,16 +157,12 @@ public class OptimizerUtils
 	public static boolean ALLOW_INTER_PROCEDURAL_ANALYSIS = true;
 
 	/**
-	 * Enables an additional "second chance" pass of static rewrites + IPA after the initial pass of
-	 * IPA.  Without this, there are many situations in which sizes will remain unknown even after
-	 * recompilation, thus leading to distributed ops.  With the second chance enabled, sizes in
-	 * these situations can be determined.  For example, the alternation of constant folding
-	 * (static rewrite) and scalar replacement (IPA) can allow for size propagation without dynamic
-	 * rewrites or recompilation due to replacement of scalars with literals during IPA, which
-	 * enables constant folding of sub-DAGs of literals during static rewrites, which in turn allows
-	 * for scalar propagation during IPA.
+	 * Number of inter-procedural analysis (IPA) repetitions. If set to >=2, we apply
+	 * IPA multiple times in order to allow scalar propagation over complex function call
+	 * graphs and various interactions between constant propagation, constant folding,
+	 * and other rewrites such as branch removal and the merge of statement block sequences.
 	 */
-	public static boolean ALLOW_IPA_SECOND_CHANCE = true;
+	public static int IPA_NUM_REPETITIONS = 3;
 
 	/**
 	 * Enables sum product rewrites such as mapmultchains. In the future, this will cover 
@@ -298,7 +294,7 @@ public class OptimizerUtils
 				ALLOW_ALGEBRAIC_SIMPLIFICATION = false;
 				ALLOW_AUTO_VECTORIZATION = false;
 				ALLOW_INTER_PROCEDURAL_ANALYSIS = false;
-				ALLOW_IPA_SECOND_CHANCE = false;
+				IPA_NUM_REPETITIONS = 1;
 				ALLOW_BRANCH_REMOVAL = false;
 				ALLOW_SUM_PRODUCT_REWRITES = false;
 				break;
@@ -310,7 +306,7 @@ public class OptimizerUtils
 				ALLOW_ALGEBRAIC_SIMPLIFICATION = false;
 				ALLOW_AUTO_VECTORIZATION = false;
 				ALLOW_INTER_PROCEDURAL_ANALYSIS = false;
-				ALLOW_IPA_SECOND_CHANCE = false;
+				IPA_NUM_REPETITIONS = 1;
 				ALLOW_BRANCH_REMOVAL = false;
 				ALLOW_SUM_PRODUCT_REWRITES = false;
 				ALLOW_LOOP_UPDATE_IN_PLACE = false;

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/main/java/org/apache/sysml/hops/ipa/FunctionCallSizeInfo.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ipa/FunctionCallSizeInfo.java b/src/main/java/org/apache/sysml/hops/ipa/FunctionCallSizeInfo.java
index b568f94..cea16e5 100644
--- a/src/main/java/org/apache/sysml/hops/ipa/FunctionCallSizeInfo.java
+++ b/src/main/java/org/apache/sysml/hops/ipa/FunctionCallSizeInfo.java
@@ -292,6 +292,18 @@ public class FunctionCallSizeInfo
 	}
 	
 	@Override
+	public boolean equals(Object o) {
+		if( o instanceof FunctionCallSizeInfo )
+			return false;
+		FunctionCallSizeInfo that = (FunctionCallSizeInfo)o;
+		return _fgraph == that._fgraph
+			&& _fcand.equals(that._fcand)
+			&& _fcandUnary.equals(that._fcandUnary)
+			&& _fcandSafeNNZ.entrySet().equals(that._fcandSafeNNZ.entrySet())
+			&& _fSafeLiterals.entrySet().equals(that._fSafeLiterals.entrySet());
+	}
+	
+	@Override
 	public String toString() {
 		StringBuilder sb = new StringBuilder();
 		

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
index 4a44317..024b456 100644
--- a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
+++ b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
@@ -171,6 +171,7 @@ public class InterProceduralAnalysis
 			throw new HopsException("Invalid number of IPA repetitions: " + repetitions);
 		
 		//perform number of requested IPA iterations
+		FunctionCallSizeInfo lastSizes = null;
 		for( int i=0; i<repetitions; i++ ) {
 			if( LOG.isDebugEnabled() )
 				LOG.debug("IPA: start IPA iteration " + (i+1) + "/" + repetitions +".");
@@ -200,6 +201,15 @@ public class InterProceduralAnalysis
 			for( IPAPass pass : _passes )
 				if( pass.isApplicable() )
 					pass.rewriteProgram(_prog, _fgraph, fcallSizes);
+			
+			//early abort without functions or on reached fixpoint
+			if( _fgraph.getReachableFunctions().isEmpty() 
+				|| (lastSizes != null && lastSizes.equals(fcallSizes)) ) {
+				if( LOG.isDebugEnabled() )
+					LOG.debug("IPA: Early abort after " + (i+1) + "/" + repetitions
+						+ " repetitions due to reached fixpoint.");
+				break;
+			}
 		}
 		
 		//cleanup pass: remove unused functions

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 577af90..40d0a1c 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -277,7 +277,7 @@ public class DMLTranslator
 		//propagate size information from main into functions (but conservatively)
 		if( OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS ) {
 			InterProceduralAnalysis ipa = new InterProceduralAnalysis(dmlp);
-			ipa.analyzeProgram(OptimizerUtils.ALLOW_IPA_SECOND_CHANCE ? 2 : 1);
+			ipa.analyzeProgram(OptimizerUtils.IPA_NUM_REPETITIONS);
 			resetHopsDAGVisitStatus(dmlp);
 		}
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/test/java/org/apache/sysml/test/integration/functions/misc/IPAConstantFoldingScalarVariablePropagationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/misc/IPAConstantFoldingScalarVariablePropagationTest.java b/src/test/java/org/apache/sysml/test/integration/functions/misc/IPAConstantFoldingScalarVariablePropagationTest.java
index e4ff6c3..51d049d 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/misc/IPAConstantFoldingScalarVariablePropagationTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/misc/IPAConstantFoldingScalarVariablePropagationTest.java
@@ -90,7 +90,7 @@ public class IPAConstantFoldingScalarVariablePropagationTest extends AutomatedTe
 	private void runIPAScalarVariablePropagationTest(String testname, boolean IPA_SECOND_CHANCE)
 	{
 		// Save old settings
-		boolean oldFlagIPASecondChance = OptimizerUtils.ALLOW_IPA_SECOND_CHANCE;
+		int oldIPANumRep = OptimizerUtils.IPA_NUM_REPETITIONS;
 		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
 		RUNTIME_PLATFORM platformOld = rtplatform;
 
@@ -102,7 +102,7 @@ public class IPAConstantFoldingScalarVariablePropagationTest extends AutomatedTe
 			String HOME = SCRIPT_DIR + TEST_DIR;
 			fullDMLScriptName = HOME + testname + ".dml";
 			programArgs = new String[]{"-stats", "-explain", "recompile_hops"};
-			OptimizerUtils.ALLOW_IPA_SECOND_CHANCE = IPA_SECOND_CHANCE;
+			OptimizerUtils.IPA_NUM_REPETITIONS = IPA_SECOND_CHANCE ? 2 : 1;
 			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
 			rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
 
@@ -116,7 +116,7 @@ public class IPAConstantFoldingScalarVariablePropagationTest extends AutomatedTe
 		}
 		finally {
 			// Reset
-			OptimizerUtils.ALLOW_IPA_SECOND_CHANCE = oldFlagIPASecondChance;
+			OptimizerUtils.IPA_NUM_REPETITIONS = oldIPANumRep;
 			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
 			rtplatform = platformOld;
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/682fc445/src/test/java/org/apache/sysml/test/integration/functions/recompile/IPAConstantPropagationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/recompile/IPAConstantPropagationTest.java b/src/test/java/org/apache/sysml/test/integration/functions/recompile/IPAConstantPropagationTest.java
index a8a4864..871d3f1 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/recompile/IPAConstantPropagationTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/recompile/IPAConstantPropagationTest.java
@@ -31,78 +31,60 @@ import org.apache.sysml.test.utils.TestUtils;
 
 public class IPAConstantPropagationTest extends AutomatedTestBase 
 {
-	
 	private final static String TEST_NAME1 = "constant_propagation_if";
 	private final static String TEST_NAME2 = "constant_propagation_while";
 	private final static String TEST_DIR = "functions/recompile/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + IPAConstantPropagationTest.class.getSimpleName() + "/";
 	
 	private final static int rows = 10;
-	private final static int cols = 15;    
-	
+	private final static int cols = 15;
 	
 	@Override
-	public void setUp() 
-	{
+	public void setUp() {
 		addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "X" }) );
 		addTestConfiguration( TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "X" }) );
 	}
 	
-	
 	@Test
-	public void testConstantPropagationNoUpdateNoBranchRemovalNoIPA() 
-	{
+	public void testConstantPropagationNoUpdateNoBranchRemovalNoIPA() {
 		runIPAConstantPropagationTest(false, false, false);
 	}
 	
 	@Test
-	public void testConstantPropagationNoUpdateNoBranchRemovalIPA() 
-	{
+	public void testConstantPropagationNoUpdateNoBranchRemovalIPA() {
 		runIPAConstantPropagationTest(false, false, true);
 	}
 	
 	@Test
-	public void testConstantPropagationNoUpdateBranchRemovalNoIPA() 
-	{
+	public void testConstantPropagationNoUpdateBranchRemovalNoIPA() {
 		runIPAConstantPropagationTest(false, true, false);
 	}
 	
 	@Test
-	public void testConstantPropagationNoUpdateBranchRemovalIPA() 
-	{
+	public void testConstantPropagationNoUpdateBranchRemovalIPA() {
 		runIPAConstantPropagationTest(false, true, true);
 	}
 	
 	@Test
-	public void testConstantPropagationUpdateNoBranchRemovalNoIPA() 
-	{
+	public void testConstantPropagationUpdateNoBranchRemovalNoIPA() {
 		runIPAConstantPropagationTest(true, false, false);
 	}
 	
 	@Test
-	public void testConstantPropagationUpdateNoBranchRemovalIPA() 
-	{
+	public void testConstantPropagationUpdateNoBranchRemovalIPA() {
 		runIPAConstantPropagationTest(true, false, true);
 	}
 	
 	@Test
-	public void testConstantPropagationUpdateBranchRemovalNoIPA() 
-	{
+	public void testConstantPropagationUpdateBranchRemovalNoIPA() {
 		runIPAConstantPropagationTest(true, true, false);
 	}
 	
 	@Test
-	public void testConstantPropagationUpdateBranchRemovalIPA() 
-	{
+	public void testConstantPropagationUpdateBranchRemovalIPA() {
 		runIPAConstantPropagationTest(true, true, true);
 	}
-
-	/**
-	 * 
-	 * @param condition
-	 * @param branchRemoval
-	 * @param IPA
-	 */
+	
 	private void runIPAConstantPropagationTest( boolean update, boolean branchRemoval, boolean IPA )
 	{	
 		boolean oldFlagBranchRemoval = OptimizerUtils.ALLOW_BRANCH_REMOVAL;
@@ -136,17 +118,15 @@ public class IPAConstantPropagationTest extends AutomatedTestBase
 			TestUtils.compareMatrices(dmlfile, rfile, 0, "Stat-DML", "Stat-R");
 			
 			//check expected number of compiled and executed MR jobs
-			int expectedNumCompiled = ( branchRemoval && IPA && !update ) ? 0 : 1; //rand
-			int expectedNumExecuted = 0;			
+			int expectedNumCompiled = ( branchRemoval && !update ) ? 0 : 1; //rand
+			int expectedNumExecuted = 0;
 			
 			checkNumCompiledMRJobs(expectedNumCompiled); 
 			checkNumExecutedMRJobs(expectedNumExecuted); 
 		}
-		finally
-		{
+		finally {
 			OptimizerUtils.ALLOW_BRANCH_REMOVAL = oldFlagBranchRemoval;
 			OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = oldFlagIPA;
 		}
 	}
-	
 }


[2/2] systemml git commit: [SYSTEMML-1933] Generalized codegen cbind handling (part 2), cleanups

Posted by mb...@apache.org.
[SYSTEMML-1933] Generalized codegen cbind handling (part 2), cleanups

This patch finalizes the codegen cbind generalization. We now do not
just fuse cbinds w/ constant vectors but arbitrary vector inputs. This
significantly extended its applicability and also revealed a number of
smaller robustness issues that needed fixing (e.g., row type selection,
row indexing on main input, switch from row to cell template). 

On GLM-probit (100M x 10, 20/10 iterations) this patch improved
end-to-end performance (w/ codegen) from 337s to 185s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/328e8a00
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/328e8a00
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/328e8a00

Branch: refs/heads/master
Commit: 328e8a0020c17c072f13d9a1bc9334af968b9c2b
Parents: 682fc44
Author: Matthias Boehm <mb...@gmail.com>
Authored: Tue Sep 26 22:24:54 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Tue Sep 26 23:38:54 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/SpoofCompiler.java       |  3 +--
 .../sysml/hops/codegen/cplan/CNodeBinary.java   |  3 ++-
 .../sysml/hops/codegen/cplan/CNodeUnary.java    |  4 ++-
 .../sysml/hops/codegen/opt/PlanSelection.java   | 12 ++++++---
 .../opt/PlanSelectionFuseCostBasedV2.java       | 14 ++++++-----
 .../hops/codegen/template/TemplateRow.java      | 26 ++++++++++++++------
 .../hops/codegen/template/TemplateUtils.java    |  3 ++-
 7 files changed, 43 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 1db2910..a4a68bb 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -718,8 +718,7 @@ public class SpoofCompiler
 			//remove invalid row templates (e.g., unsatisfied blocksize constraint)
 			if( tpl instanceof CNodeRow ) {
 				//check for invalid row cplan over column vector
-				if(in1.getNumCols() == 1 || (((CNodeRow)tpl).getRowType()==RowType.NO_AGG
-					&& tpl.getOutput().getDataType().isScalar()) ) {
+				if( ((CNodeRow)tpl).getRowType()==RowType.NO_AGG && tpl.getOutput().getDataType().isScalar() ) {
 					cplans2.remove(e.getKey());
 					if( LOG.isTraceEnabled() )
 						LOG.trace("Removed invalid row cplan w/o agg on column vector.");

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index c2b5644..42a36ac 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -270,7 +270,8 @@ public class CNodeBinary extends CNode
 		
 		//generate binary operation (use sparse template, if data input)
 		boolean lsparse = sparse && (_inputs.get(0) instanceof CNodeData 
-			&& _inputs.get(0).getVarname().startsWith("a")
+			&& (_inputs.get(0).getVarname().startsWith("a")
+				|| _inputs.get(1).getVarname().startsWith("a"))
 			&& !_inputs.get(0).isLiteral());
 		boolean scalarInput = _inputs.get(0).getDataType().isScalar();
 		boolean scalarVector = (_inputs.get(0).getDataType().isScalar()

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index b3720dd..860d35a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -87,7 +87,9 @@ public class CNodeUnary extends CNode
 				case EXP:
 					return "    double %TMP% = FastMath.exp(%IN1%);\n";
 			    case LOOKUP_R:
-			    	return "    double %TMP% = getValue(%IN1%, rowIndex);\n";
+			    	return sparse ?
+			    		"    double %TMP% = getValue(%IN1v%, %IN1i%, ai, alen, 0);\n" :
+			    		"    double %TMP% = getValue(%IN1%, rowIndex);\n";
 			    case LOOKUP_C:
 			    	return "    double %TMP% = getValue(%IN1%, n, 0, colIndex);\n";
 			    case LOOKUP_RC:

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelection.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelection.java
index d18d156..21f4fd3 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelection.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelection.java
@@ -47,18 +47,22 @@ public abstract class PlanSelection
 	 * @param memo partial fusion plans P
 	 * @param roots entry points of HOP DAG G
 	 */
-	public abstract void selectPlans(CPlanMemoTable memo, ArrayList<Hop> roots);	
+	public abstract void selectPlans(CPlanMemoTable memo, ArrayList<Hop> roots);
 	
 	/**
-	 * Determines if the given partial fusion plan is valid.
+	 * Determines if the given partial fusion plan is a valid entry point
+	 * of a fused operator.
 	 * 
 	 * @param me memo table entry
 	 * @param hop current hop
 	 * @return true if entry is valid as top-level plan
 	 */
 	public static boolean isValid(MemoTableEntry me, Hop hop) {
-		return (me.type != TemplateType.OUTER //ROW, CELL, MAGG
-			|| (me.closed || HopRewriteUtils.isBinaryMatrixMatrixOperation(hop)));
+		return (me.type == TemplateType.CELL)
+			|| (me.type == TemplateType.MAGG)
+			|| (me.type == TemplateType.ROW && !HopRewriteUtils.isTransposeOperation(hop))
+			|| (me.type == TemplateType.OUTER 
+				&& (me.closed || HopRewriteUtils.isBinaryMatrixMatrixOperation(hop)));
 	}
 	
 	protected void addBestPlan(long hopID, MemoTableEntry me) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index 7c27dcf..8d1c4c0 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -43,6 +43,7 @@ import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.Direction;
+import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LiteralOp;
 import org.apache.sysml.hops.OptimizerUtils;
@@ -568,18 +569,18 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		}
 	}
 	
-	private static boolean isRowTemplateWithoutAgg(CPlanMemoTable memo, Hop current, HashSet<Long> visited) {
+	private static boolean isRowTemplateWithoutAggOrVects(CPlanMemoTable memo, Hop current, HashSet<Long> visited) {
 		//consider all aggregations other than root operation
 		MemoTableEntry me = memo.getBest(current.getHopID(), TemplateType.ROW);
 		boolean ret = true;
 		for(int i=0; i<3; i++)
 			if( me.isPlanRef(i) )
-				ret &= rIsRowTemplateWithoutAgg(memo, 
+				ret &= rIsRowTemplateWithoutAggOrVects(memo, 
 					current.getInput().get(i), visited);
 		return ret;
 	}
 	
-	private static boolean rIsRowTemplateWithoutAgg(CPlanMemoTable memo, Hop current, HashSet<Long> visited) {
+	private static boolean rIsRowTemplateWithoutAggOrVects(CPlanMemoTable memo, Hop current, HashSet<Long> visited) {
 		if( visited.contains(current.getHopID()) )
 			return true;
 		
@@ -587,8 +588,9 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		MemoTableEntry me = memo.getBest(current.getHopID(), TemplateType.ROW);
 		for(int i=0; i<3; i++)
 			if( me!=null && me.isPlanRef(i) )
-				ret &= rIsRowTemplateWithoutAgg(memo, current.getInput().get(i), visited);
-		ret &= !(current instanceof AggUnaryOp || current instanceof AggBinaryOp);
+				ret &= rIsRowTemplateWithoutAggOrVects(memo, current.getInput().get(i), visited);
+		ret &= !(current instanceof AggUnaryOp || current instanceof AggBinaryOp
+			|| HopRewriteUtils.isBinary(current, OpOp2.CBIND));
 		
 		visited.add(current.getHopID());
 		return ret;
@@ -628,7 +630,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 		for( Long hopID : part.getPartition() ) {
 			MemoTableEntry me = memo.getBest(hopID, TemplateType.ROW);
 			if( me != null && me.type == TemplateType.ROW && memo.contains(hopID, TemplateType.CELL)
-				&& isRowTemplateWithoutAgg(memo, memo.getHopRefs().get(hopID), new HashSet<Long>())) {
+				&& isRowTemplateWithoutAggOrVects(memo, memo.getHopRefs().get(hopID), new HashSet<Long>())) {
 				List<MemoTableEntry> blacklist = memo.get(hopID, TemplateType.ROW); 
 				memo.remove(memo.getHopRefs().get(hopID), new HashSet<MemoTableEntry>(blacklist));
 				if( LOG.isTraceEnabled() ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index d9209be..1aaa84f 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -78,7 +78,7 @@ public class TemplateRow extends TemplateBase
 		return (hop instanceof BinaryOp && hop.dimsKnown() && isValidBinaryOperation(hop)
 				&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1)
 			|| (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && hop.getInput().get(0).isMatrix()
-				&& HopRewriteUtils.isDataGenOpWithConstantValue(hop.getInput().get(1)))
+				&& hop.dimsKnown() && TemplateUtils.isColVector(hop.getInput().get(1)))
 			|| (hop instanceof AggBinaryOp && hop.dimsKnown() && hop.getDim2()==1 //MV
 				&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1)
 			|| (hop instanceof AggBinaryOp && hop.dimsKnown() && LibMatrixMult.isSkinnyRightHandSide(
@@ -101,9 +101,9 @@ public class TemplateRow extends TemplateBase
 		return !isClosed() && 
 			(  (hop instanceof BinaryOp && isValidBinaryOperation(hop) ) 
 			|| (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && hop.getInput().indexOf(input)==0
-				&& HopRewriteUtils.isDataGenOpWithConstantValue(hop.getInput().get(1)))
+				&& hop.dimsKnown() && TemplateUtils.isColVector(hop.getInput().get(1)))
 			|| ((hop instanceof UnaryOp || hop instanceof ParameterizedBuiltinOp) 
-					&& TemplateCell.isValidOperation(hop))
+				&& TemplateCell.isValidOperation(hop))
 			|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()!=Direction.RowCol
 				&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG))
 			|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection() == Direction.RowCol 
@@ -121,7 +121,9 @@ public class TemplateRow extends TemplateBase
 		//merge rowagg tpl with cell tpl if input is a vector
 		return !isClosed() &&
 			((hop instanceof BinaryOp && isValidBinaryOperation(hop)
-				&& hop.getDim1() > 1 && input.getDim1()>1) 
+				&& hop.getDim1() > 1 && input.getDim1()>1)
+			|| (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && hop.getInput().get(0).isMatrix()
+				&& hop.dimsKnown() && TemplateUtils.isColVector(hop.getInput().get(1)))
 			 ||(hop instanceof AggBinaryOp
 				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
 			 	&& (input.getDim2()==1 || (input==hop.getInput().get(1) 
@@ -184,6 +186,7 @@ public class TemplateRow extends TemplateBase
 		Hop[] sinHops = inHops.stream()
 			.filter(h -> !(h.getDataType().isScalar() && tmp.get(h.getHopID()).isLiteral()))
 			.sorted(new HopInputComparator(inHops2.get("X"),inHops2.get("B1"))).toArray(Hop[]::new);
+		inHops2.putIfAbsent("X", sinHops[0]); //robustness special cases
 		
 		//construct template node
 		ArrayList<CNode> inputs = new ArrayList<CNode>();
@@ -326,10 +329,19 @@ public class TemplateRow extends TemplateBase
 		{
 			//special case for cbind with zeros
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
-			CNode cdata2 = TemplateUtils.createCNodeData(
-				HopRewriteUtils.getDataGenOpConstantValue(hop.getInput().get(1)), true);
+			CNode cdata2 = null;
+			if( HopRewriteUtils.isDataGenOpWithConstantValue(hop.getInput().get(1)) ) {
+				cdata2 = TemplateUtils.createCNodeData(HopRewriteUtils
+					.getDataGenOpConstantValue(hop.getInput().get(1)), true);
+				inHops.remove(hop.getInput().get(1)); //rm 0-matrix
+			}
+			else {
+				cdata2 = tmp.get(hop.getInput().get(1).getHopID());
+				cdata2 = TemplateUtils.wrapLookupIfNecessary(cdata2, hop.getInput().get(1));
+			}
 			out = new CNodeBinary(cdata1, cdata2, BinType.VECT_CBIND);
-			inHops.remove(hop.getInput().get(1)); //rm 0-matrix
+			if( cdata1 instanceof CNodeData )
+				inHops2.put("X", hop.getInput().get(0));
 		}
 		else if(hop instanceof BinaryOp)
 		{

http://git-wip-us.apache.org/repos/asf/systemml/blob/328e8a00/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index 9d7baf9..21f44b2 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -235,7 +235,8 @@ public class TemplateUtils
 	}
 	
 	public static boolean isLookup(CNode node, boolean includeRC1) {
-		return isUnary(node, UnaryType.LOOKUP_R, UnaryType.LOOKUP_C, UnaryType.LOOKUP_RC)
+		return isUnary(node, UnaryType.LOOKUP_C, UnaryType.LOOKUP_RC)
+			|| (includeRC1 && isUnary(node, UnaryType.LOOKUP_R))
 			|| (includeRC1 && isTernary(node, TernaryType.LOOKUP_RC1));
 	}