You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/18 01:39:40 UTC

[1/2] systemml git commit: [SYSTEMML-2019] Fix missing codegen support for cbind-add operations

Repository: systemml
Updated Branches:
  refs/heads/master 5e96d2a6a -> db3d54c1c


[SYSTEMML-2019] Fix missing codegen support for cbind-add operations

This patch addresses recent issues of running the perfest suite with
codegen. Various algorithms failed for the intercept 2 configuration due
to missing cbind-add operations at the output of row templates. This
patch adds the necessary compiler and runtime support, dedicated tests,
and also fixes a related issues of incorrect results with sparse
cbind-write operations.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/93917287
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/93917287
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/93917287

Branch: refs/heads/master
Commit: 93917287a2853235c9e38ba228af25816a7e5a5e
Parents: 5e96d2a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Nov 17 15:40:56 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Nov 17 15:40:56 2017 -0800

----------------------------------------------------------------------
 .../apache/sysml/hops/codegen/SpoofFusedOp.java |  12 +-
 .../sysml/hops/codegen/cplan/CNodeBinary.java   |  12 +-
 .../sysml/hops/codegen/cplan/CNodeRow.java      |  21 ++--
 .../hops/codegen/template/TemplateRow.java      |   3 +-
 .../hops/codegen/template/TemplateUtils.java    |   2 +
 .../runtime/codegen/LibSpoofPrimitives.java     |  20 +++-
 .../sysml/runtime/codegen/SpoofRowwise.java     |  26 +++--
 .../runtime/matrix/data/LibMatrixMult.java      |  18 +++
 .../org/apache/sysml/utils/NativeHelper.java    |   2 +-
 .../functions/codegen/AlgorithmLinregCG.java    | 115 ++++++++++++++++---
 .../functions/codegen/Algorithm_LinregCG.R      |  10 +-
 11 files changed, 185 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
index c6176cc..bae51cb 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
@@ -47,7 +47,8 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
 		MULTI_SCALAR,
 		ROW_RANK_DIMS, // right wdivmm, row mm
 		COLUMN_RANK_DIMS,  // left wdivmm, row mm
-		COLUMN_RANK_DIMS_T;
+		COLUMN_RANK_DIMS_T,
+		VECT_CONST2;
 	}
 	
 	private Class<?> _class = null;
@@ -176,6 +177,9 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
 				case INPUT_DIMS_CONST2:
 					ret = new long[]{mc.getRows(), _constDim2, -1};
 					break;
+				case VECT_CONST2:
+					ret = new long[]{1, _constDim2, -1};
+					break;	
 				case SCALAR:
 					ret = new long[]{0, 0, -1};
 					break;
@@ -238,6 +242,10 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
 				setDim1(getInput().get(0).getDim1());
 				setDim2(_constDim2);
 				break;
+			case VECT_CONST2:
+				setDim1(1);
+				setDim2(_constDim2);
+				break;
 			case SCALAR:
 				setDim1(0);
 				setDim2(0);
@@ -260,7 +268,7 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
 				break;	
 			default:
 				throw new RuntimeException("Failed to refresh size information "
-						+ "for type: "+_dimsType.toString());
+					+ "for type: "+_dimsType.toString());
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index 8c3c73d..d771a84 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -35,6 +35,7 @@ public class CNodeBinary extends CNode
 		VECT_POW_ADD, VECT_MIN_ADD, VECT_MAX_ADD,
 		VECT_EQUAL_ADD, VECT_NOTEQUAL_ADD, VECT_LESS_ADD, 
 		VECT_LESSEQUAL_ADD, VECT_GREATER_ADD, VECT_GREATEREQUAL_ADD,
+		VECT_CBIND_ADD,
 		//vector-scalar operations
 		VECT_MULT_SCALAR, VECT_DIV_SCALAR, VECT_MINUS_SCALAR, VECT_PLUS_SCALAR,
 		VECT_POW_SCALAR, VECT_MIN_SCALAR, VECT_MAX_SCALAR,
@@ -94,7 +95,8 @@ public class CNodeBinary extends CNode
 				case VECT_LESS_ADD:
 				case VECT_LESSEQUAL_ADD:
 				case VECT_GREATER_ADD:
-				case VECT_GREATEREQUAL_ADD: {
+				case VECT_GREATEREQUAL_ADD:
+				case VECT_CBIND_ADD: {
 					String vectName = getVectorPrimitiveName();
 					if( scalarVector )
 						return sparseLhs ? "    LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2v%, %OUT%, %IN2i%, %POS2%, %POSOUT%, alen, %LEN%);\n" : 
@@ -129,11 +131,11 @@ public class CNodeBinary extends CNode
 				
 				case VECT_CBIND:
 					if( scalarInput )
-						return  "    double[] %TMP% = LibSpoofPrimitives.vectCBindWrite(%IN1%, %IN2%);\n";
+						return  "    double[] %TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1%, %IN2%);\n";
 					else
 						return sparseLhs ? 
-								"    double[] %TMP% = LibSpoofPrimitives.vectCBindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, %LEN%);\n" : 
-								"    double[] %TMP% = LibSpoofPrimitives.vectCBindWrite(%IN1%, %IN2%, %POS1%, %LEN%);\n";
+								"    double[] %TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, %LEN%);\n" : 
+								"    double[] %TMP% = LibSpoofPrimitives.vectCbindWrite(%IN1%, %IN2%, %POS1%, %LEN%);\n";
 				
 				//vector-vector operations
 				case VECT_MULT:
@@ -344,6 +346,7 @@ public class CNodeBinary extends CNode
 			case VECT_LESSEQUAL_ADD:       return "b(vltea)";
 			case VECT_GREATEREQUAL_ADD:    return "b(vgtea)";
 			case VECT_GREATER_ADD:         return "b(vgta)";
+			case VECT_CBIND_ADD:           return "b(vcbinda)";
 			case VECT_MULT_SCALAR:         return "b(vm)";
 			case VECT_DIV_SCALAR:          return "b(vd)";
 			case VECT_MINUS_SCALAR:        return "b(vmi)";
@@ -409,6 +412,7 @@ public class CNodeBinary extends CNode
 			case VECT_LESSEQUAL_ADD: 
 			case VECT_GREATER_ADD: 
 			case VECT_GREATEREQUAL_ADD:
+			case VECT_CBIND_ADD:
 				boolean vectorScalar = _inputs.get(1).getDataType()==DataType.SCALAR;
 				_rows = _inputs.get(vectorScalar ? 0 : 1)._rows;
 				_cols = _inputs.get(vectorScalar ? 0 : 1)._cols;

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
index 68739ad..41cf108 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
@@ -150,16 +150,17 @@ public class CNodeRow extends CNodeTpl
 	@Override
 	public SpoofOutputDimsType getOutputDimType() {
 		switch( _type ) {
-			case NO_AGG:       return SpoofOutputDimsType.INPUT_DIMS;
-			case NO_AGG_B1:    return SpoofOutputDimsType.ROW_RANK_DIMS;
-			case NO_AGG_CONST: return SpoofOutputDimsType.INPUT_DIMS_CONST2; 
-			case FULL_AGG:     return SpoofOutputDimsType.SCALAR;
-			case ROW_AGG:      return SpoofOutputDimsType.ROW_DIMS;
-			case COL_AGG:      return SpoofOutputDimsType.COLUMN_DIMS_COLS; //row vector
-			case COL_AGG_T:    return SpoofOutputDimsType.COLUMN_DIMS_ROWS; //column vector
-			case COL_AGG_B1:   return SpoofOutputDimsType.COLUMN_RANK_DIMS; 
-			case COL_AGG_B1_T: return SpoofOutputDimsType.COLUMN_RANK_DIMS_T;
-			case COL_AGG_B1R:  return SpoofOutputDimsType.RANK_DIMS_COLS;
+			case NO_AGG:        return SpoofOutputDimsType.INPUT_DIMS;
+			case NO_AGG_B1:     return SpoofOutputDimsType.ROW_RANK_DIMS;
+			case NO_AGG_CONST:  return SpoofOutputDimsType.INPUT_DIMS_CONST2; 
+			case FULL_AGG:      return SpoofOutputDimsType.SCALAR;
+			case ROW_AGG:       return SpoofOutputDimsType.ROW_DIMS;
+			case COL_AGG:       return SpoofOutputDimsType.COLUMN_DIMS_COLS; //row vector
+			case COL_AGG_T:     return SpoofOutputDimsType.COLUMN_DIMS_ROWS; //column vector
+			case COL_AGG_B1:    return SpoofOutputDimsType.COLUMN_RANK_DIMS; 
+			case COL_AGG_B1_T:  return SpoofOutputDimsType.COLUMN_RANK_DIMS_T;
+			case COL_AGG_B1R:   return SpoofOutputDimsType.RANK_DIMS_COLS;
+			case COL_AGG_CONST: return SpoofOutputDimsType.VECT_CONST2;
 			default:
 				throw new RuntimeException("Unsupported row type: "+_type.toString());
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index d91c36e..fcc3e93 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -219,7 +219,8 @@ public class TemplateRow extends TemplateBase
 		CNodeRow tpl = new CNodeRow(inputs, output);
 		tpl.setRowType(TemplateUtils.getRowType(hop, 
 			inHops2.get("X"), inHops2.get("B1")));
-		if( tpl.getRowType()==RowType.NO_AGG_CONST )
+		if( tpl.getRowType()==RowType.NO_AGG_CONST 
+			|| tpl.getRowType()==RowType.COL_AGG_CONST )
 			tpl.setConstDim2(hop.getDim2());
 		tpl.setNumVectorIntermediates(TemplateUtils
 			.determineMinVectorIntermediates(output));

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index e07c410..8ac2d54 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -209,6 +209,8 @@ public class TemplateUtils
 			return RowType.COL_AGG_B1R;
 		else if( X.getDim1() == output.getDim1() && X.getDim2() != output.getDim2() )
 			return RowType.NO_AGG_CONST;
+		else if( output.getDim1() == 1 && X.getDim2() != output.getDim2() )
+			return RowType.COL_AGG_CONST;
 		else
 			throw new RuntimeException("Unknown row type for hop "+output.getHopID()+".");
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index 1d56f1c..5bef83b 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -214,22 +214,34 @@ public class LibSpoofPrimitives
 	
 	// cbind handling
 	
-	public static double[] vectCBindWrite(double a, double b) {
+	public static double[] vectCbindAdd(double[] a, double b, double[] c, int ai, int ci, int len) {
+		LibMatrixMult.vectAdd(a, c, ai, ci, len);
+		c[ci+len] += b;
+		return c;
+	}
+	
+	public static double[] vectCbindAdd(double[] a, double b, double[] c, int[] aix, int ai, int ci, int alen, int len) {
+		LibMatrixMult.vectAdd(a, c, aix, ai, ci, alen);
+		c[ci+len] += b;
+		return c;
+	}
+	
+	public static double[] vectCbindWrite(double a, double b) {
 		double[] c = allocVector(2, false);
 		c[0] = a;
 		c[1] = b;
 		return c;
 	}
 	
-	public static double[] vectCBindWrite(double[] a, double b, int aix, int len) {
+	public static double[] vectCbindWrite(double[] a, double b, int aix, int len) {
 		double[] c = allocVector(len+1, false);
 		System.arraycopy(a, aix, c, 0, len);
 		c[len] = b;
 		return c;
 	}
 	
-	public static double[] vectCBindWrite(double[] a, double b, int[] aix, int ai, int alen, int len) {
-		double[] c = allocVector(len+1, false);
+	public static double[] vectCbindWrite(double[] a, double b, int[] aix, int ai, int alen, int len) {
+		double[] c = allocVector(len+1, true);
 		for( int j = ai; j < ai+alen; j++ )
 			c[aix[j]] = a[j];
 		c[len] = b;

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index b0afd88..889ee1f 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -55,12 +55,13 @@ public abstract class SpoofRowwise extends SpoofOperator
 		COL_AGG_T,    //transposed col aggregation (e.g., t(X) %*% y)
 		COL_AGG_B1,   //col aggregation w/ matrix mult B1
 		COL_AGG_B1_T, //transposed col aggregation w/ matrix mult B1
-		COL_AGG_B1R;  //col aggregation w/ matrix mult B1 to row vector
+		COL_AGG_B1R,  //col aggregation w/ matrix mult B1 to row vector
+		COL_AGG_CONST;//col aggregation w/ expansion/contraction
 		
 		public boolean isColumnAgg() {
 			return this == COL_AGG || this == COL_AGG_T
 				|| this == COL_AGG_B1 || this == COL_AGG_B1_T
-				|| this == COL_AGG_B1R;
+				|| this == COL_AGG_B1R || this == COL_AGG_CONST;
 		}
 		public boolean isRowTypeB1() {
 			return this == NO_AGG_B1 || this == COL_AGG_B1 
@@ -260,16 +261,17 @@ public abstract class SpoofRowwise extends SpoofOperator
 	
 	private void allocateOutputMatrix(int m, int n, int n2, MatrixBlock out) {
 		switch( _type ) {
-			case NO_AGG:       out.reset(m, n, false); break;
-			case NO_AGG_B1:    out.reset(m, n2, false); break;
-			case NO_AGG_CONST: out.reset(m, (int)_constDim2, false); break;
-			case FULL_AGG:     out.reset(1, 1, false); break;
-			case ROW_AGG:      out.reset(m, 1, false); break;
-			case COL_AGG:      out.reset(1, n, false); break;
-			case COL_AGG_T:    out.reset(n, 1, false); break;
-			case COL_AGG_B1:   out.reset(n2, n, false); break;
-			case COL_AGG_B1_T: out.reset(n, n2, false); break;
-			case COL_AGG_B1R:  out.reset(1, n2, false); break;
+			case NO_AGG:        out.reset(m, n, false); break;
+			case NO_AGG_B1:     out.reset(m, n2, false); break;
+			case NO_AGG_CONST:  out.reset(m, (int)_constDim2, false); break;
+			case FULL_AGG:      out.reset(1, 1, false); break;
+			case ROW_AGG:       out.reset(m, 1, false); break;
+			case COL_AGG:       out.reset(1, n, false); break;
+			case COL_AGG_T:     out.reset(n, 1, false); break;
+			case COL_AGG_B1:    out.reset(n2, n, false); break;
+			case COL_AGG_B1_T:  out.reset(n, n2, false); break;
+			case COL_AGG_B1R:   out.reset(1, n2, false); break;
+			case COL_AGG_CONST: out.reset(1, (int)_constDim2, false); break;
 		}
 		out.allocateDenseBlock();
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index a1f648e..9421bdb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -3288,6 +3288,24 @@ public class LibMatrixMult
 		}
 	}
 
+	public static void vectAdd( double[] a, double[] c, int[] aix, int ai, int ci, final int alen ) {
+		final int bn = alen%8;
+		//rest, not aligned to 8-blocks
+		for( int j = ai; j < ai+bn; j++ )
+			c[ ci+aix[j] ] += a[ j ];
+		//unrolled 8-block  (for better instruction-level parallelism)
+		for( int j = ai+bn; j < ai+alen; j+=8 ) {
+			c[ ci+aix[j+0] ] += a[ j+0 ];
+			c[ ci+aix[j+1] ] += a[ j+1 ];
+			c[ ci+aix[j+2] ] += a[ j+2 ];
+			c[ ci+aix[j+3] ] += a[ j+3 ];
+			c[ ci+aix[j+4] ] += a[ j+4 ];
+			c[ ci+aix[j+5] ] += a[ j+5 ];
+			c[ ci+aix[j+6] ] += a[ j+6 ];
+			c[ ci+aix[j+7] ] += a[ j+7 ];
+		}
+	}
+	
 	private static void vectAdd4( double[] a1, double[] a2, double[] a3, double[] a4, double[] c, int ai, int ci, final int len )
 	{
 		final int bn = len%8;

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/main/java/org/apache/sysml/utils/NativeHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/NativeHelper.java b/src/main/java/org/apache/sysml/utils/NativeHelper.java
index db8e74b..6f22970 100644
--- a/src/main/java/org/apache/sysml/utils/NativeHelper.java
+++ b/src/main/java/org/apache/sysml/utils/NativeHelper.java
@@ -48,7 +48,7 @@ public class NativeHelper {
 		SUCCESSFULLY_LOADED_NATIVE_BLAS_AND_IN_USE,
 		SUCCESSFULLY_LOADED_NATIVE_BLAS_AND_NOT_IN_USE,
 		ATTEMPTED_LOADING_NATIVE_BLAS_UNSUCCESSFULLY
-	};
+	}
 	
 	public static NativeBlasState CURRENT_NATIVE_BLAS_STATE = NativeBlasState.NOT_ATTEMPTED_LOADING_NATIVE_BLAS;
 	private static String blasType;

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
index 80e4b9f..149b28e 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
@@ -49,7 +49,6 @@ public class AlgorithmLinregCG extends AutomatedTestBase
 	private final static double sparsity1 = 0.7; //dense
 	private final static double sparsity2 = 0.1; //sparse
 	
-	private final static int intercept = 0;
 	private final static double epsilon = 0.000000001;
 	private final static double maxiter = 10;
 	
@@ -60,46 +59,126 @@ public class AlgorithmLinregCG extends AutomatedTestBase
 	}
 
 	@Test
-	public void testLinregCGDenseRewritesCP() {
-		runLinregCGTest(TEST_NAME1, true, false, ExecType.CP);
+	public void testLinregCG0DenseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, false, 0, ExecType.CP);
 	}
 	
 	@Test
-	public void testLinregCGSparseRewritesCP() {
-		runLinregCGTest(TEST_NAME1, true, true, ExecType.CP);
+	public void testLinregCG0SparseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, true, 0, ExecType.CP);
 	}
 	
 	@Test
-	public void testLinregCGDenseCP() {
-		runLinregCGTest(TEST_NAME1, false, false, ExecType.CP);
+	public void testLinregCG0DenseCP() {
+		runLinregCGTest(TEST_NAME1, false, false, 0, ExecType.CP);
 	}
 	
 	@Test
-	public void testLinregCGSparseCP() {
-		runLinregCGTest(TEST_NAME1, false, true, ExecType.CP);
+	public void testLinregCG0SparseCP() {
+		runLinregCGTest(TEST_NAME1, false, true, 0, ExecType.CP);
 	}
 
 	@Test
-	public void testLinregCGDenseRewritesSP() {
-		runLinregCGTest(TEST_NAME1, true, false, ExecType.SPARK);
+	public void testLinregCG0DenseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, false, 0, ExecType.SPARK);
 	}
 	
 	@Test
-	public void testLinregCGSparseRewritesSP() {
-		runLinregCGTest(TEST_NAME1, true, true, ExecType.SPARK);
+	public void testLinregCG0SparseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, true, 0, ExecType.SPARK);
 	}
 	
 	@Test
-	public void testLinregCGDenseSP() {
-		runLinregCGTest(TEST_NAME1, false, false, ExecType.SPARK);
+	public void testLinregCG0DenseSP() {
+		runLinregCGTest(TEST_NAME1, false, false, 0, ExecType.SPARK);
 	}
 	
 	@Test
-	public void testLinregCGSparseSP() {
-		runLinregCGTest(TEST_NAME1, false, true, ExecType.SPARK);
+	public void testLinregCG0SparseSP() {
+		runLinregCGTest(TEST_NAME1, false, true, 0, ExecType.SPARK);
 	}
 	
-	private void runLinregCGTest( String testname, boolean rewrites, boolean sparse, ExecType instType)
+	@Test
+	public void testLinregCG1DenseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, false, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG1SparseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, true, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG1DenseCP() {
+		runLinregCGTest(TEST_NAME1, false, false, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG1SparseCP() {
+		runLinregCGTest(TEST_NAME1, false, true, 1, ExecType.CP);
+	}
+
+	@Test
+	public void testLinregCG1DenseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, false, 1, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG1SparseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, true, 1, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG1DenseSP() {
+		runLinregCGTest(TEST_NAME1, false, false, 1, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG1SparseSP() {
+		runLinregCGTest(TEST_NAME1, false, true, 1, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG2DenseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, false, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG2SparseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, true, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG2DenseCP() {
+		runLinregCGTest(TEST_NAME1, false, false, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCG2SparseCP() {
+		runLinregCGTest(TEST_NAME1, false, true, 2, ExecType.CP);
+	}
+
+	@Test
+	public void testLinregCG2DenseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, false, 2, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG2SparseRewritesSP() {
+		runLinregCGTest(TEST_NAME1, true, true, 2, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG2DenseSP() {
+		runLinregCGTest(TEST_NAME1, false, false, 2, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCG2SparseSP() {
+		runLinregCGTest(TEST_NAME1, false, true, 2, ExecType.SPARK);
+	}
+	
+	private void runLinregCGTest( String testname, boolean rewrites, boolean sparse, int intercept, ExecType instType)
 	{
 		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
 		RUNTIME_PLATFORM platformOld = rtplatform;

http://git-wip-us.apache.org/repos/asf/systemml/blob/93917287/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/Algorithm_LinregCG.R b/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
index 600d42e..83d1f1f 100644
--- a/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
+++ b/src/test/scripts/functions/codegen/Algorithm_LinregCG.R
@@ -51,13 +51,15 @@ if (intercept_status == 1 | intercept_status == 2)
 }
 
 if (intercept_status == 2) {
-    avg_X_cols = t(colSums(X)) / n;
-    var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
+    avg_X_cols = colSums(X) / n;
+    var_X_cols = (colSums (X ^ 2) - n * (avg_X_cols ^ 2)) / (n - 1);
     is_unsafe = (var_X_cols <= 0);
     scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-    scale_X [m_ext, 1] = 1;
+    scale_X [m_ext] = 1;
     shift_X = - avg_X_cols * scale_X;
-    shift_X [m_ext, 1] = 0;
+    shift_X [m_ext] = 0;
+    scale_X = as.matrix(scale_X);
+    shift_X = as.matrix(shift_X);
 } else {
     scale_X = matrix (1, m_ext, 1);
     shift_X = matrix (0, m_ext, 1);


[2/2] systemml git commit: [SYSTEMML-2020] Avoid sparse-dense conversion in codegen outer tpls

Posted by mb...@apache.org.
[SYSTEMML-2020] Avoid sparse-dense conversion in codegen outer tpls

This patch generalizes the codegen outer product template to consume -
similar to the cell and row templates - generic SideInputs instead of
dense matrices as side input, which avoids unnecessary conversions from
sparse to dense matrices and thus improves performance.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/db3d54c1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/db3d54c1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/db3d54c1

Branch: refs/heads/master
Commit: db3d54c1ce6651f2abf43d3ba919527b943aa8ba
Parents: 9391728
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Nov 17 16:49:45 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Nov 17 16:49:45 2017 -0800

----------------------------------------------------------------------
 .../hops/codegen/cplan/CNodeOuterProduct.java   |  5 +-
 .../runtime/codegen/SpoofOuterProduct.java      | 66 +++++++++++---------
 2 files changed, 39 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/db3d54c1/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
index e518246..e2967a5 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
@@ -31,6 +31,7 @@ public class CNodeOuterProduct extends CNodeTpl
 	private static final String TEMPLATE = 
 			  "package codegen;\n"
 			+ "import org.apache.sysml.runtime.codegen.LibSpoofPrimitives;\n"
+			+ "import org.apache.sysml.runtime.codegen.SpoofOperator.SideInput;\n"
 			+ "import org.apache.sysml.runtime.codegen.SpoofOuterProduct;\n"
 			+ "import org.apache.sysml.runtime.codegen.SpoofOuterProduct.OutProdType;\n"
 			+ "import org.apache.commons.math3.util.FastMath;\n"
@@ -39,10 +40,10 @@ public class CNodeOuterProduct extends CNodeTpl
 			+ "  public %TMP%() {\n"
 			+ "    super(OutProdType.%TYPE%);\n"
 			+ "  }\n"
-			+ "  protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rix, int cix) { \n"
+			+ "  protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, SideInput[] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rix, int cix) { \n"
 			+ "%BODY_dense%"
 			+ "  }\n"
-			+ "  protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int len, int rix, int cix) { \n"
+			+ "  protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, SideInput[] b, double[] scalars, int m, int n, int len, int rix, int cix) { \n"
 			+ "%BODY_cellwise%"
 			+ "    return %OUT_cellwise%;\n"
 			+ "  }\n"			

http://git-wip-us.apache.org/repos/asf/systemml/blob/db3d54c1/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index 26a661a..4d4da51 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -82,7 +82,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		
 		//input preparation
 		double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
-		double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+		SideInput[] b = prepInputMatrices(inputs, 3, false);
 		double[] scalars = prepInputScalars(scalarObjects);
 		
 		//core sequential execute
@@ -118,7 +118,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		
 		//input preparation
 		double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
-		double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+		SideInput[] b = prepInputMatrices(inputs, 3, false);
 		double[] scalars = prepInputScalars(scalarObjects);
 		
 		//core sequential execute
@@ -186,9 +186,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		
 		//input preparation
 		double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
-		double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+		SideInput[] b = prepInputMatrices(inputs, 3, false);
 		double[] scalars = prepInputScalars(scalarObjects);
-				
+		
 		//core sequential execute
 		final int m = inputs.get(0).getNumRows();
 		final int n = inputs.get(0).getNumColumns();
@@ -268,7 +268,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		
 		//input preparation
 		double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
-		double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+		SideInput[] b = prepInputMatrices(inputs, 3, false);
 		double[] scalars = prepInputScalars(scalarObjects);
 		
 		//core sequential execute
@@ -338,13 +338,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		return UtilFunctions.roundToNext(base, k);
 	}
 	
-	private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars,
+	private void executeDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars,
 		double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
 	{
 		//approach: iterate over non-zeros of w, selective mm computation
 		//cache-conscious blocking: due to blocksize constraint (default 1000),
 		//a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB) 
 		
+		SideInput[] lb = createSparseSideInputs(b);
 		final int blocksizeIJ = 16; //u/v block (max at typical L2 size) 
 		int cix = 0;
 		//blocked execution
@@ -358,18 +359,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 					for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k)
 						if( a[ix+j] != 0 ) {
 							cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
-							genexecDense( a[ix+j], u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j); 
+							genexecDense( a[ix+j], u, uix, v, vix, lb, scalars, c, cix, m, n, k, i, j); 
 						}
 			}
 	}
 	
-	private void executeCellwiseDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars,
+	private void executeCellwiseDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars,
 		double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
 	{
 		//approach: iterate over non-zeros of w, selective mm computation
 		//cache-conscious blocking: due to blocksize constraint (default 1000),
 		//a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB)
 		
+		SideInput[] lb = createSparseSideInputs(b);
 		final int blocksizeIJ = 16; //u/v block (max at typical L2 size)
 		//blocked execution
 		double sum = 0;
@@ -383,18 +385,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 					for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k)
 						if( a[ix+j] != 0 ) {
 							if(type == OutProdType.CELLWISE_OUTER_PRODUCT)
-								c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j );
+								c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, lb, scalars, m, n, k, i, j );
 							else
-								sum += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);
+								sum += genexecCellwise( a[ix+j], u, uix, v, vix, lb, scalars, m, n, k, i, j);
 						}
 			}
 		if( type != OutProdType.CELLWISE_OUTER_PRODUCT )
 			c[0] = sum;
 	}
 	
-	private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars,
+	private void executeSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars,
 		double[] c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) 
 	{
+		SideInput[] lb = createSparseSideInputs(b);
 		boolean left = (_outerProductType== OutProdType.LEFT_OUTER_PRODUCT);
 		
 		//approach: iterate over non-zeros of w, selective mm computation
@@ -420,7 +423,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 				int index = (cl==0||sblock.isEmpty(i)) ? 0 : sblock.posFIndexGTE(i,cl);
 				index = wpos + ((index>=0) ? index : n);
 				for( ; index<wpos+wlen && wix[index]<cu; index++ ) {
-					genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c,
+					genexecDense(wval[index], u, uix, v, wix[index]*k, lb, scalars, c,
 						(left ? wix[index]*k : uix), m, n, k, i, wix[index]);
 				}
 			}
@@ -454,7 +457,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 						
 						int index = wpos + curk[i-bi];
 						for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) {
-							genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c,
+							genexecDense(wval[index], u, uix, v, wix[index]*k, lb, scalars, c,
 								(left ? wix[index]*k : uix), m, n, k, i, wix[index]);
 						}
 						curk[i-bi] = index - wpos;
@@ -464,9 +467,10 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		}
 	}
 	
-	private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars, 
+	private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars, 
 		MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu ) 
 	{
+		SideInput[] lb = createSparseSideInputs(b);
 		final int blocksizeIJ = (int) (8L*m*n/nnz);
 		int[] curk = new int[Math.min(blocksizeIJ, ru-rl)];
 		
@@ -491,11 +495,11 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 						if( type == OutProdType.CELLWISE_OUTER_PRODUCT )
 							for( ; index<wpos+wlen && wix[index]<bjmin; index++ )
 								c[wix[index]] = genexecCellwise( wval[index], 
-									u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] );
+									u, uix, v, wix[index]*k, lb, scalars, m, n, k, i, wix[index] );
 						else
 							for( ; index<wpos+wlen && wix[index]<bjmin; index++ )
 								tmp += genexecCellwise( wval[index], 
-									u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]);
+									u, uix, v, wix[index]*k, lb, scalars, m, n, k, i, wix[index]);
 						curk[i-bi] = index - wpos;
 					}
 				}
@@ -522,7 +526,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 						int index = wpos + curk[i-bi];
 						for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) {
 							c.append(i, wix[index], genexecCellwise( wval[index], u, uix, v,
-								wix[index]*k, b, scalars, m, n, k, i, wix[index] ));
+								wix[index]*k, lb, scalars, m, n, k, i, wix[index] ));
 						}
 						curk[i-bi] = index - wpos;
 					}
@@ -531,9 +535,10 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		}
 	}
 	
-	private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, 
+	private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, 
 			double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) 
 	{
+		SideInput[] lb = createSparseSideInputs(b);
 		boolean left = (_outerProductType==OutProdType.LEFT_OUTER_PRODUCT);
 		
 		Iterator<IJV> iter = !left ? a.getIterator(rl, ru, false) :
@@ -542,14 +547,15 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 			IJV cell = iter.next();
 			int uix = cell.getI() * k;
 			int vix = cell.getJ() * k;
-			genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c,
+			genexecDense(cell.getV(), u, uix, v, vix, lb, scalars, c,
 				left ? vix : uix, m, n, k, cell.getI(), cell.getJ());
 		}
 	}
 	
-	private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars,
+	private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars,
 		MatrixBlock out, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
-	{			
+	{
+		SideInput[] lb = createSparseSideInputs(b);
 		double[] c = out.getDenseBlock();
 		SparseBlock csblock = out.getSparseBlock();
 		
@@ -562,23 +568,23 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 				if( out.isInSparseFormat() ) {
 					csblock.allocate(cell.getI());
 					csblock.append(cell.getI(), cell.getJ(),
-						genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
+						genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ()));
 				}
 				else {
 					c[cell.getI()*n+cell.getJ()] =
-						genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+						genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ());
 				}
 			}
 			else {
-				c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+				c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ());
 			}
 		}
 	}
 
-	protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, double[][] b,
+	protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, SideInput[] b,
 		double[] scalars, double[] c, int ci, int m, int n, int k, int rowIndex, int colIndex);
 	
-	protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, double[][] b,
+	protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, SideInput[] b,
 		double[] scalars, int m, int n, int k, int rowIndex, int colIndex);
 	
 	private class ParExecTask implements Callable<Long> 
@@ -586,7 +592,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		private final MatrixBlock _a;
 		private final double[] _u;
 		private final double[] _v;
-		private final double[][] _b;
+		private final SideInput[] _b;
 		private final double[] _scalars;
 		private final MatrixBlock _c;
 		private final int _clen;
@@ -598,7 +604,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		private final int _cl;
 		private final int _cu;
 		
-		protected ParExecTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
+		protected ParExecTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
 			_a = a;
 			_u = u;
 			_v = v;
@@ -653,7 +659,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		private final MatrixBlock _a;
 		private final double[] _u;
 		private final double[] _v;
-		private final double[][] _b;
+		private final SideInput[] _b;
 		private final double[] _scalars;
 		private final int _rlen;
 		private final int _clen;
@@ -664,7 +670,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
 		private final int _cl;
 		private final int _cu;
 		
-		protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
+		protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
 			_a = a;
 			_u = u;
 			_v = v;