You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/04/08 04:22:57 UTC

[1/2] incubator-systemml git commit: [SYSTEMML-1448] Support rowMins/rowMaxs in codegen row aggregates

Repository: incubator-systemml
Updated Branches:
  refs/heads/master de270219f -> 5de7beea2


[SYSTEMML-1448] Support rowMins/rowMaxs in codegen row aggregates

This patch extends the codegen compiler by rowmins and rowmax for row
aggregate templates and introduces the related runtime primitives.
Furthermore, this patch also fixes various existing primitives such as
sparse rowSums, and dense vector comparisons.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/18ab98a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/18ab98a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/18ab98a6

Branch: refs/heads/master
Commit: 18ab98a6e2083551618f4bf62dba3a19574b115b
Parents: de27021
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Apr 7 19:45:41 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Apr 7 19:45:41 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/cplan/CNodeUnary.java    | 15 ++++-
 .../hops/codegen/template/TemplateRowAgg.java   | 10 +++-
 .../runtime/codegen/LibSpoofPrimitives.java     | 60 ++++++++++++++------
 .../functions/codegen/AlgorithmLinregCG.java    |  4 +-
 .../functions/codegen/RowAggTmplTest.java       | 13 ++++-
 .../scripts/functions/codegen/rowAggPattern10.R | 34 +++++++++++
 .../functions/codegen/rowAggPattern10.dml       | 28 +++++++++
 7 files changed, 139 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index 9d3a877..262295c 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -21,13 +21,15 @@ package org.apache.sysml.hops.codegen.cplan;
 
 import java.util.Arrays;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.sysml.parser.Expression.DataType;
 
 
 public class CNodeUnary extends CNode
 {
 	public enum UnaryType {
-		ROW_SUMS, LOOKUP_R, LOOKUP_RC, LOOKUP0, //codegen specific
+		LOOKUP_R, LOOKUP_RC, LOOKUP0, //codegen specific
+		ROW_SUMS, ROW_MINS, ROW_MAXS, //codegen specific
 		EXP, POW2, MULT2, SQRT, LOG, LOG_NZ,
 		ABS, ROUND, CEIL, FLOOR, SIGN, 
 		SIN, COS, TAN, ASIN, ACOS, ATAN,
@@ -43,8 +45,11 @@ public class CNodeUnary extends CNode
 		public String getTemplate(boolean sparse) {
 			switch( this ) {
 				case ROW_SUMS:
-					return sparse ? "    double %TMP% = LibSpoofPrimitives.vectSum(%IN1v%, %IN1i%, %POS1%, %LEN%);\n": 
-									"    double %TMP% = LibSpoofPrimitives.vectSum(%IN1%, %POS1%, %LEN%);\n"; 
+				case ROW_MINS:
+				case ROW_MAXS:
+					String vectName = StringUtils.capitalize(this.toString().substring(4,7).toLowerCase());
+					return sparse ? "    double %TMP% = LibSpoofPrimitives.vect"+vectName+"(%IN1v%, %IN1i%, %POS1%, %LEN%);\n": 
+									"    double %TMP% = LibSpoofPrimitives.vect"+vectName+"(%IN1%, %POS1%, %LEN%);\n"; 
 				case EXP:
 					return "    double %TMP% = FastMath.exp(%IN1%);\n";
 			    case LOOKUP_R:
@@ -153,6 +158,8 @@ public class CNodeUnary extends CNode
 	public String toString() {
 		switch(_type) {
 			case ROW_SUMS:  return "u(R+)";
+			case ROW_MINS:  return "u(Rmin)";
+			case ROW_MAXS:  return "u(Rmax)";
 			case LOOKUP_R:	return "u(ixr)";
 			case LOOKUP_RC:	return "u(ixrc)";
 			case LOOKUP0:	return "u(ix0)";
@@ -165,6 +172,8 @@ public class CNodeUnary extends CNode
 	public void setOutputDims() {
 		switch(_type) {
 			case ROW_SUMS:
+			case ROW_MINS:
+			case ROW_MAXS:
 			case EXP:
 			case LOOKUP_R:
 			case LOOKUP_RC:

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java
index 63f7cc6..49d0cb8 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java
@@ -55,6 +55,7 @@ import org.apache.sysml.runtime.matrix.data.Pair;
 
 public class TemplateRowAgg extends TemplateBase 
 {
+	private static final Hop.AggOp[] SUPPORTED_ROW_AGG = new AggOp[]{AggOp.SUM, AggOp.MIN, AggOp.MAX};
 	private static final Hop.OpOp2[] SUPPORTED_VECT_BINARY = new OpOp2[]{OpOp2.MULT, OpOp2.DIV, 
 			OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.LESS, OpOp2.LESSEQUAL, OpOp2.GREATER, OpOp2.GREATEREQUAL};
 	
@@ -157,11 +158,12 @@ public class TemplateRowAgg extends TemplateBase
 		if(hop instanceof AggUnaryOp)
 		{
 			CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
-			if(  ((AggUnaryOp)hop).getDirection() == Direction.Row && ((AggUnaryOp)hop).getOp() == AggOp.SUM  ) {
+			if( ((AggUnaryOp)hop).getDirection() == Direction.Row && HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG) ) {
 				if(hop.getInput().get(0).getDim2()==1)
 					out = (cdata1.getDataType()==DataType.SCALAR) ? cdata1 : new CNodeUnary(cdata1,UnaryType.LOOKUP_R);
 				else {
-					out = new CNodeUnary(cdata1, UnaryType.ROW_SUMS);
+					String opcode = "ROW_"+((AggUnaryOp)hop).getOp().name().toUpperCase()+"S";
+					out = new CNodeUnary(cdata1, UnaryType.valueOf(opcode));
 					inHops2.put("X", hop.getInput().get(0));
 				}
 			}
@@ -284,6 +286,10 @@ public class TemplateRowAgg extends TemplateBase
 					TernaryType.LOOKUP_RC1);
 		}
 		
+		if( out == null ) {
+			throw new RuntimeException(hop.getHopID()+" "+hop.getOpString());
+		}
+		
 		if( out.getDataType().isMatrix() ) {
 			out.setNumRows(hop.getDim1());
 			out.setNumCols(hop.getDim2());

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index dfe3244..6907b0b 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -74,7 +74,7 @@ public class LibSpoofPrimitives
 		return c;
 	}
 
-	// custom vector sums
+	// custom vector sums, mins, maxs
 	
 	/**
 	 * Computes c = sum(A), where A is a dense vectors. 
@@ -89,14 +89,14 @@ public class LibSpoofPrimitives
 		final int bn = len%8;
 				
 		//compute rest
-		for( int i = 0; i < bn; i++, ai++ )
-			val += a[ ai ];
+		for( int i = ai; i < ai+bn; i++ )
+			val += a[ i ];
 		
 		//unrolled 8-block (for better instruction-level parallelism)
-		for( int i = bn; i < len; i+=8, ai+=8 ) {
+		for( int i = ai+bn; i < ai+len; i+=8 ) {
 			//read 64B cacheline of a, compute cval' = sum(a) + cval
-			val += a[ ai+0 ] + a[ ai+1 ] + a[ ai+2 ] + a[ ai+3 ]
-			     + a[ ai+4 ] + a[ ai+5 ] + a[ ai+6 ] + a[ ai+7 ];
+			val += a[ i+0 ] + a[ i+1 ] + a[ i+2 ] + a[ i+3 ]
+			     + a[ i+4 ] + a[ i+5 ] + a[ i+6 ] + a[ i+7 ];
 		}
 		
 		//scalar result
@@ -118,23 +118,51 @@ public class LibSpoofPrimitives
 				
 		//compute rest
 		for( int i = ai; i < ai+bn; i++ )
-			val += avals[ ai+aix[i] ];
+			val += avals[ aix[i] ];
 		
 		//unrolled 8-block (for better instruction-level parallelism)
 		for( int i = ai+bn; i < ai+len; i+=8 )
 		{
 			//read 64B of a via 'gather'
 			//compute cval' = sum(a) + cval
-			val += avals[ ai+aix[i+0] ] + avals[ ai+aix[i+1] ]
-			     + avals[ ai+aix[i+2] ] + avals[ ai+aix[i+3] ]
-			     + avals[ ai+aix[i+4] ] + avals[ ai+aix[i+5] ]
-			     + avals[ ai+aix[i+6] ] + avals[ ai+aix[i+7] ];
+			val += avals[ aix[i+0] ] + avals[ aix[i+1] ]
+			     + avals[ aix[i+2] ] + avals[ aix[i+3] ]
+			     + avals[ aix[i+4] ] + avals[ aix[i+5] ]
+			     + avals[ aix[i+6] ] + avals[ aix[i+7] ];
 		}
 		
 		//scalar result
 		return val; 
+	}
+	
+	public static double vectMin(double[] a, int ai, int len) { 
+		double val = Double.MAX_VALUE;
+		for( int i = ai; i < ai+len; i++ )
+			val = Math.min(a[ai], val);
+		return val; 
 	} 
 	
+	public static double vectMin(double[] avals, int[] aix, int ai, int len) {
+		double val = Double.MAX_VALUE;
+		for( int i = ai; i < ai+len; i++ )
+			val = Math.min(avals[aix[i]], val);
+		return val;
+	}
+	
+	public static double vectMax(double[] a, int ai, int len) { 
+		double val = -Double.MAX_VALUE;
+		for( int i = ai; i < ai+len; i++ )
+			val = Math.max(a[ai], val);
+		return val; 
+	} 
+	
+	public static double vectMax(double[] avals, int[] aix, int ai, int len) {
+		double val = -Double.MAX_VALUE;
+		for( int i = ai; i < ai+len; i++ )
+			val = Math.max(avals[aix[i]], val);
+		return val;
+	}
+	
 	//custom vector div
 	
 	public static void vectDivAdd(double[] a, double bval, double[] c, int ai, int ci, int len) {
@@ -202,7 +230,7 @@ public class LibSpoofPrimitives
 	public static double[] vectNotequalWrite(double[] a, double bval, int ai, int len) {
 		double[] c = allocVector(len, false);
 		for( int j = 0; j < len; j++, ai++)
-			c[j] = (a[j] != bval) ? 1 : 0;
+			c[j] = (a[ai] != bval) ? 1 : 0;
 		return c;
 	}
 
@@ -228,7 +256,7 @@ public class LibSpoofPrimitives
 	public static double[] vectLessWrite(double[] a, double bval, int ai, int len) {
 		double[] c = allocVector(len, false);
 		for( int j = 0; j < len; j++, ai++)
-			c[j] = (a[j] < bval) ? 1 : 0;
+			c[j] = (a[ai] < bval) ? 1 : 0;
 		return c;
 	}
 
@@ -254,7 +282,7 @@ public class LibSpoofPrimitives
 	public static double[] vectLessequalWrite(double[] a, double bval, int ai, int len) {
 		double[] c = allocVector(len, false);
 		for( int j = 0; j < len; j++, ai++)
-			c[j] = (a[j] <= bval) ? 1 : 0;
+			c[j] = (a[ai] <= bval) ? 1 : 0;
 		return c;
 	}
 
@@ -280,7 +308,7 @@ public class LibSpoofPrimitives
 	public static double[] vectGreaterWrite(double[] a, double bval, int ai, int len) {
 		double[] c = allocVector(len, false);
 		for( int j = 0; j < len; j++, ai++)
-			c[j] = (a[j] > bval) ? 1 : 0;
+			c[j] = (a[ai] > bval) ? 1 : 0;
 		return c;
 	}
 
@@ -306,7 +334,7 @@ public class LibSpoofPrimitives
 	public static double[] vectGreaterequalWrite(double[] a, double bval, int ai, int len) {
 		double[] c = allocVector(len, false);
 		for( int j = 0; j < len; j++, ai++)
-			c[j] = (a[j] >= bval) ? 1 : 0;
+			c[j] = (a[ai] >= bval) ? 1 : 0;
 		return c;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
index 1a2ecfe..6e3549e 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
@@ -41,9 +41,7 @@ public class AlgorithmLinregCG extends AutomatedTestBase
 	private final static String TEST_CONF = "SystemML-config-codegen.xml";
 	private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF);
 	
-	//TODO Investigate numerical stability issues: on certain platforms this test, occasionally fails,
-	//for 1e-5 (specifically testLinregCGSparseRewritesCP); apparently due to the -(-(X)) -> X rewrite.
-	private final static double eps = 1e-1;
+	private final static double eps = 1e-5;
 	
 	private final static int rows = 2468;
 	private final static int cols = 507;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index b783104..c83bff3 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -45,6 +45,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME7 = TEST_NAME+"7"; //t(X)%*%(X%*%v-y); sum((X%*%v-y)^2);
 	private static final String TEST_NAME8 = TEST_NAME+"8"; //colSums((X/rowSums(X))>0.7)
 	private static final String TEST_NAME9 = TEST_NAME+"9"; //t(X) %*% (v - abs(y))
+	private static final String TEST_NAME10 = TEST_NAME+"10"; //Y=(X<=rowMins(X)); R=colSums((Y/rowSums(Y)));
 	
 	private static final String TEST_DIR = "functions/codegen/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + RowAggTmplTest.class.getSimpleName() + "/";
@@ -56,7 +57,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for(int i=1; i<=9; i++)
+		for(int i=1; i<=10; i++)
 			addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) );
 	}
 	
@@ -105,6 +106,11 @@ public class RowAggTmplTest extends AutomatedTestBase
 		testCodegenIntegration( TEST_NAME9, true, ExecType.CP );	
 	}
 	
+	@Test
+	public void testCodegenRowAggRewrite10() {
+		testCodegenIntegration( TEST_NAME10, true, ExecType.CP );	
+	}
+	
 	@Test	
 	public void testCodegenRowAgg1() {
 		testCodegenIntegration( TEST_NAME1, false, ExecType.CP );
@@ -150,6 +156,11 @@ public class RowAggTmplTest extends AutomatedTestBase
 		testCodegenIntegration( TEST_NAME9, false, ExecType.CP );	
 	}
 	
+	@Test
+	public void testCodegenRowAgg10() {
+		testCodegenIntegration( TEST_NAME10, false, ExecType.CP );	
+	}
+	
 	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
 	{	
 		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/test/scripts/functions/codegen/rowAggPattern10.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern10.R b/src/test/scripts/functions/codegen/rowAggPattern10.R
new file mode 100644
index 0000000..44e91ac
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern10.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+
+X = matrix(seq(1,1500), 150, 10, byrow=TRUE);
+
+Y = (X <= rowMins(X));
+Z = (Y / rowSums(Y));
+R = t(colSums(Z));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep="")); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/18ab98a6/src/test/scripts/functions/codegen/rowAggPattern10.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern10.dml b/src/test/scripts/functions/codegen/rowAggPattern10.dml
new file mode 100644
index 0000000..4572e68
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern10.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(seq(1,1500), rows=150, cols=10);
+
+Y = (X <= rowMins(X));
+Z = (Y / rowSums(Y));
+R = colSums(Z);
+
+write(R, $1)


[2/2] incubator-systemml git commit: [SYSTEMML-1439] Improved codegen row-aggregate candidate exploration

Posted by mb...@apache.org.
[SYSTEMML-1439] Improved codegen row-aggregate candidate exploration

This patch generalizes the existing candidate exploration algorithm to
enable merging of partial rowagg templates. Together with a couple of
minor fixes and cleanups, this allows us now fusing the following
expression (from Kmeans) into a single operator.

Y = (X <= rowMins(X));
Z = (Y / rowSums(Y));
R = colSums(Z);

Note that the first row aggregate and row comparison consume the
original rows whereas the subsequent row aggregate and element-wise
division work over temporary row vectors (with internal reuse of
thread-local temporary row vectors).


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5de7beea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5de7beea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5de7beea

Branch: refs/heads/master
Commit: 5de7beea2f5d4b9d6c9a8f7f3ae152b7442cf923
Parents: 18ab98a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Apr 7 21:23:58 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Apr 7 21:23:58 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/SpoofCompiler.java       |  3 +-
 .../sysml/hops/codegen/cplan/CNodeUnary.java    |  7 +++-
 .../hops/codegen/template/TemplateUtils.java    |  5 +++
 .../sysml/hops/rewrite/HopRewriteUtils.java     | 43 +++++---------------
 .../functions/codegen/AlgorithmLinregCG.java    |  2 +-
 5 files changed, 24 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5de7beea/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 2e60732..3dfb452 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -439,7 +439,8 @@ public class SpoofCompiler
 			if( k != pos ) {
 				Hop input2 = hop.getInput().get(k);
 				if( memo.contains(input2.getHopID()) && !memo.get(input2.getHopID()).get(0).closed
-					&& memo.get(input2.getHopID()).get(0).type == TemplateType.CellTpl && tpl.merge(hop, input2) ) 
+					&& TemplateUtils.isType(memo.get(input2.getHopID()).get(0).type, tpl.getType(), TemplateType.CellTpl)
+					&& tpl.merge(hop, input2) ) 
 					P.crossProduct(k, -1L, input2.getHopID());
 				else
 					P.crossProduct(k, -1L);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5de7beea/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index 262295c..025033b 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -130,8 +130,9 @@ public class CNodeUnary extends CNode
 		sb.append(_inputs.get(0).codegen(sparse));
 		
 		//generate unary operation
+		boolean lsparse = sparse && (_inputs.get(0) instanceof CNodeData);
 		String var = createVarname();
-		String tmp = _type.getTemplate(sparse);
+		String tmp = _type.getTemplate(lsparse);
 		tmp = tmp.replaceAll("%TMP%", var);
 		
 		String varj = _inputs.get(0).getVarname();
@@ -142,7 +143,9 @@ public class CNodeUnary extends CNode
 		tmp = tmp.replaceAll("%IN1%", varj );
 		
 		//replace start position of main input
-		String spos = !varj.startsWith("b") ? varj+"i" : "0";
+		String spos = (!varj.startsWith("b") 
+			&& _inputs.get(0) instanceof CNodeData 
+			&& _inputs.get(0).getDataType().isMatrix()) ? varj+"i" : "0";
 		tmp = tmp.replaceAll("%POS1%", spos);
 		tmp = tmp.replaceAll("%POS2%", spos);
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5de7beea/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index c6a259f..e8d2086 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
@@ -294,4 +295,8 @@ public class TemplateUtils
 		return ret + ((node instanceof CNodeBinary 
 			&& ((CNodeBinary)node).getType().isVectorScalarPrimitive()) ? 1 : 0);
 	}
+
+	public static boolean isType(TemplateType type, TemplateType... validTypes) {
+		return ArrayUtils.contains(validTypes, type);
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5de7beea/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index fcfc14b..a4b6ec1 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -22,6 +22,7 @@ package org.apache.sysml.hops.rewrite;
 import java.util.ArrayList;
 import java.util.HashMap;
 
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.conf.ConfigurationManager;
@@ -1066,46 +1067,24 @@ public class HopRewriteUtils
 	//////////////////////////////////////
 	// utils for lookup tables
 	
-	public static boolean isValidOp( AggOp input, AggOp[] validTab )
-	{
-		for( AggOp valid : validTab )
-			if( valid == input )
-				return true;
-		return false;
+	public static boolean isValidOp( AggOp input, AggOp[] validTab ) {
+		return ArrayUtils.contains(validTab, input);
 	}
 	
-	public static boolean isValidOp( OpOp1 input, OpOp1[] validTab )
-	{
-		for( OpOp1 valid : validTab )
-			if( valid == input )
-				return true;
-		return false;
+	public static boolean isValidOp( OpOp1 input, OpOp1[] validTab ) {
+		return ArrayUtils.contains(validTab, input);
 	}
 	
-	public static boolean isValidOp( OpOp2 input, OpOp2[] validTab )
-	{
-		for( OpOp2 valid : validTab )
-			if( valid == input )
-				return true;
-		return false;
+	public static boolean isValidOp( OpOp2 input, OpOp2[] validTab ) {
+		return ArrayUtils.contains(validTab, input);
 	}
 	
-	public static boolean isValidOp( ReOrgOp input, ReOrgOp[] validTab )
-	{
-		for( ReOrgOp valid : validTab )
-			if( valid == input )
-				return true;
-		return false;
+	public static boolean isValidOp( ReOrgOp input, ReOrgOp[] validTab ) {
+		return ArrayUtils.contains(validTab, input);
 	}
 	
-	public static int getValidOpPos( OpOp2 input, OpOp2[] validTab )
-	{
-		for( int i=0; i<validTab.length; i++ ) {
-			 OpOp2 valid = validTab[i];
-			 if( valid == input )
-					return i;
-		}
-		return -1;
+	public static int getValidOpPos( OpOp2 input, OpOp2[] validTab ) {
+		return ArrayUtils.indexOf(validTab, input);
 	}
 	
 	/**

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5de7beea/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
index 6e3549e..dacc6ee 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
@@ -41,7 +41,7 @@ public class AlgorithmLinregCG extends AutomatedTestBase
 	private final static String TEST_CONF = "SystemML-config-codegen.xml";
 	private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF);
 	
-	private final static double eps = 1e-5;
+	private final static double eps = 1e-1;
 	
 	private final static int rows = 2468;
 	private final static int cols = 507;