You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/06/30 06:13:28 UTC
[2/2] systemml git commit: [SYSTEMML-1535] Codegen matrix-matrix
multiplication support
[SYSTEMML-1535] Codegen matrix-matrix multiplication support
This patch generalizes the row-wise code generation template from
matrix-vector to matrix-matrix multiplications, which enables a broad
range of additional fusion opportunities. Examples are Mlogreg and
KMeans with multiple classes or centroids, respectively. The fusion of
matrix-matrix multiplications avoids unnecessary scans of X as well as
large intermediates of size nrow(X) x K.
On a scenario of KMeans w/ 1 run, 20 iterations, 100M x 10 dense input,
and 5 centroids, this change improved the end-to-end performance from
852s (1360s w/o codegen) to 463s. The major additional benefits come
from fusing (1) -2 * (X %*% t(C)) + t(rowSums (C ^ 2)), and (2) (t(P)
%*% X), which avoid two large intermediates for X %*% t(C) and t(P).
Furthermore, this patch also lays the foundations for more complex dags
with different vector sizes in row-wise templates.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6b25b3bf
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6b25b3bf
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6b25b3bf
Branch: refs/heads/master
Commit: 6b25b3bf2621f13d97c6a3bf3a66a333af834db7
Parents: 6a4aa1d
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Jun 29 22:38:03 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Jun 29 23:14:05 2017 -0700
----------------------------------------------------------------------
.../sysml/hops/codegen/SpoofCompiler.java | 2 +-
.../apache/sysml/hops/codegen/SpoofFusedOp.java | 15 ++-
.../apache/sysml/hops/codegen/cplan/CNode.java | 10 ++
.../sysml/hops/codegen/cplan/CNodeBinary.java | 79 +++++++++---
.../hops/codegen/cplan/CNodeOuterProduct.java | 6 +-
.../sysml/hops/codegen/cplan/CNodeRow.java | 58 +++++----
.../sysml/hops/codegen/cplan/CNodeUnary.java | 4 +
.../hops/codegen/template/TemplateRow.java | 84 +++++++++---
.../hops/codegen/template/TemplateUtils.java | 127 +++++++------------
.../runtime/codegen/LibSpoofPrimitives.java | 83 ++++++++++--
.../sysml/runtime/codegen/SpoofCellwise.java | 4 +-
.../runtime/codegen/SpoofMultiAggregate.java | 2 +-
.../sysml/runtime/codegen/SpoofOperator.java | 105 +++++++--------
.../runtime/codegen/SpoofOuterProduct.java | 16 +--
.../sysml/runtime/codegen/SpoofRowwise.java | 77 ++++++-----
.../instructions/spark/SpoofSPInstruction.java | 3 +-
.../spark/data/PartitionedBroadcast.java | 8 ++
.../runtime/matrix/data/LibMatrixMult.java | 33 ++---
.../functions/codegen/RowAggTmplTest.java | 66 +++++++++-
.../scripts/functions/codegen/rowAggPattern24.R | 33 +++++
.../functions/codegen/rowAggPattern24.dml | 30 +++++
.../scripts/functions/codegen/rowAggPattern25.R | 32 +++++
.../functions/codegen/rowAggPattern25.dml | 29 +++++
.../scripts/functions/codegen/rowAggPattern26.R | 32 +++++
.../functions/codegen/rowAggPattern26.dml | 28 ++++
.../scripts/functions/codegen/rowAggPattern27.R | 32 +++++
.../functions/codegen/rowAggPattern27.dml | 29 +++++
27 files changed, 747 insertions(+), 280 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index fede282..5342c09 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -96,7 +96,7 @@ import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.utils.Explain;
import org.apache.sysml.utils.Statistics;
-public class SpoofCompiler
+public class SpoofCompiler
{
private static final Log LOG = LogFactory.getLog(SpoofCompiler.class.getName());
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
index 06be99b..0d4b8db 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
@@ -44,8 +44,9 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
COLUMN_DIMS_COLS,
SCALAR,
MULTI_SCALAR,
- ROW_RANK_DIMS, // right wdivmm
- COLUMN_RANK_DIMS // left wdivmm
+ ROW_RANK_DIMS, // right wdivmm, row mm
+ COLUMN_RANK_DIMS, // left wdivmm, row mm
+ COLUMN_RANK_DIMS_T;
}
private Class<?> _class = null;
@@ -182,6 +183,12 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
ret = new long[]{mc.getCols(), mc2.getCols(), -1};
break;
}
+ case COLUMN_RANK_DIMS_T: {
+ MatrixCharacteristics mc2 = memo.getAllInputStats(getInput().get(1));
+ if( mc2.dimsKnown() )
+ ret = new long[]{mc2.getCols(), mc.getCols(), -1};
+ break;
+ }
default:
throw new RuntimeException("Failed to infer worst-case size information "
+ "for type: "+_dimsType.toString());
@@ -231,6 +238,10 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
setDim1(getInput().get(0).getDim2());
setDim2(getInput().get(1).getDim2());
break;
+ case COLUMN_RANK_DIMS_T:
+ setDim1(getInput().get(1).getDim2());
+ setDim2(getInput().get(0).getDim2());
+ break;
default:
throw new RuntimeException("Failed to refresh size information "
+ "for type: "+_dimsType.toString());
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/cplan/CNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNode.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNode.java
index efe468e..1f91697 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNode.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNode.java
@@ -83,6 +83,16 @@ public abstract class CNode
return _genVar;
}
+ public String getVectorLength() {
+ if( getVarname().startsWith("a") )
+ return "len";
+ else if( getVarname().startsWith("b") )
+ return getVarname()+".clen";
+ else if( _dataType==DataType.MATRIX )
+ return getVarname()+".length";
+ return "";
+ }
+
public String getClassname() {
return getVarname();
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index 8d67f26..4bbf205 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -28,7 +28,8 @@ import org.apache.sysml.runtime.util.UtilFunctions;
public class CNodeBinary extends CNode
{
public enum BinType {
- DOT_PRODUCT,
+ //matrix multiplication operations
+ DOT_PRODUCT, VECT_MATRIXMULT, VECT_OUTERMULT_ADD,
//vector-scalar-add operations
VECT_MULT_ADD, VECT_DIV_ADD, VECT_MINUS_ADD, VECT_PLUS_ADD,
VECT_POW_ADD, VECT_MIN_ADD, VECT_MAX_ADD,
@@ -71,6 +72,12 @@ public class CNodeBinary extends CNode
case DOT_PRODUCT:
return sparse ? " double %TMP% = LibSpoofPrimitives.dotProduct(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, alen);\n" :
" double %TMP% = LibSpoofPrimitives.dotProduct(%IN1%, %IN2%, %POS1%, %POS2%, %LEN%);\n";
+ case VECT_MATRIXMULT:
+ return sparse ? " double[] %TMP% = LibSpoofPrimitives.vectMatrixMult(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, alen, len);\n" :
+ " double[] %TMP% = LibSpoofPrimitives.vectMatrixMult(%IN1%, %IN2%, %POS1%, %POS2%, %LEN%);\n";
+ case VECT_OUTERMULT_ADD:
+ return sparse ? " LibSpoofPrimitives.vectOuterMultAdd(%IN1v%, %IN2%, %OUT%, %IN1i%, %POS1%, %POS2%, %POSOUT%, alen, %LEN1%, %LEN2%);\n" :
+ " LibSpoofPrimitives.vectOuterMultAdd(%IN1%, %IN2%, %OUT%, %POS1%, %POS2%, %POSOUT%, %LEN1%, %LEN2%);\n";
//vector-scalar-add operations
case VECT_MULT_ADD:
@@ -88,10 +95,10 @@ public class CNodeBinary extends CNode
case VECT_GREATEREQUAL_ADD: {
String vectName = getVectorPrimitiveName();
if( scalarVector )
- return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2v%, %OUT%, %IN2i%, %POS2%, %POSOUT%, alen, len);\n" :
+ return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2v%, %OUT%, %IN2i%, %POS2%, %POSOUT%, alen, %LEN%);\n" :
" LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2%, %OUT%, %POS2%, %POSOUT%, %LEN%);\n";
else
- return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1v%, %IN2%, %OUT%, %IN1i%, %POS1%, %POSOUT%, alen, len);\n" :
+ return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1v%, %IN2%, %OUT%, %IN1i%, %POS1%, %POSOUT%, alen, %LEN%);\n" :
" LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2%, %OUT%, %POS1%, %POSOUT%, %LEN%);\n";
}
@@ -111,10 +118,10 @@ public class CNodeBinary extends CNode
case VECT_GREATEREQUAL_SCALAR: {
String vectName = getVectorPrimitiveName();
if( scalarVector )
- return sparse ? " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %IN2i%, %POS2%, alen, len);\n" :
+ return sparse ? " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %IN2i%, %POS2%, alen, %LEN%);\n" :
" double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS2%, %LEN%);\n";
else
- return sparse ? " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, len);\n" :
+ return sparse ? " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, %LEN%);\n" :
" double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
}
@@ -133,7 +140,7 @@ public class CNodeBinary extends CNode
case VECT_GREATEREQUAL: {
String vectName = getVectorPrimitiveName();
return sparse ?
- " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, alen, len);\n" :
+ " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, alen, %LEN%);\n" :
" double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %POS2%, %LEN%);\n";
}
@@ -185,7 +192,8 @@ public class CNodeBinary extends CNode
}
public boolean isVectorPrimitive() {
return isVectorScalarPrimitive()
- || isVectorVectorPrimitive();
+ || isVectorVectorPrimitive()
+ || isVectorMatrixPrimitive();
}
public boolean isVectorScalarPrimitive() {
return this == VECT_DIV_SCALAR || this == VECT_MULT_SCALAR
@@ -204,6 +212,10 @@ public class CNodeBinary extends CNode
|| this == VECT_LESS || this == VECT_LESSEQUAL
|| this == VECT_GREATER || this == VECT_GREATEREQUAL;
}
+ public boolean isVectorMatrixPrimitive() {
+ return this == VECT_MATRIXMULT
+ || this == VECT_OUTERMULT_ADD;
+ }
public BinType getVectorAddPrimitive() {
return BinType.valueOf("VECT_"+getVectorPrimitiveName().toUpperCase()+"_ADD");
}
@@ -257,19 +269,32 @@ public class CNodeBinary extends CNode
tmp = tmp.replace("%TMP%", var);
//replace input references and start indexes
- for( int j=1; j<=2; j++ ) {
- String varj = _inputs.get(j-1).getVarname();
+ for( int j=0; j<2; j++ ) {
+ String varj = _inputs.get(j).getVarname();
//replace sparse and dense inputs
- tmp = tmp.replace("%IN"+j+"v%", varj+"vals");
- tmp = tmp.replace("%IN"+j+"i%", varj+"ix");
- tmp = tmp.replace("%IN"+j+"%", varj );
+ tmp = tmp.replace("%IN"+(j+1)+"v%", varj+"vals");
+ tmp = tmp.replace("%IN"+(j+1)+"i%", varj+"ix");
+ tmp = tmp.replace("%IN"+(j+1)+"%",
+ varj.startsWith("b") ? varj + ".ddat" : varj );
//replace start position of main input
- tmp = tmp.replace("%POS"+j+"%", (_inputs.get(j-1) instanceof CNodeData
- && _inputs.get(j-1).getDataType().isMatrix()) ? (!varj.startsWith("b")) ?
- varj+"i" : TemplateUtils.isMatrix(_inputs.get(j-1)) ? "rowIndex*len" : "0" : "0");
+ tmp = tmp.replace("%POS"+(j+1)+"%", (_inputs.get(j) instanceof CNodeData
+ && _inputs.get(j).getDataType().isMatrix()) ? (!varj.startsWith("b")) ? varj+"i" :
+ (TemplateUtils.isMatrix(_inputs.get(j)) && _type!=BinType.VECT_MATRIXMULT) ?
+ "rowIndex*"+((_type==BinType.VECT_OUTERMULT_ADD)?"%LEN"+(j+1)+"%":"%LEN%") : "0" : "0");
+ }
+ //replace length information (e.g., after matrix mult)
+ if( _type == BinType.VECT_OUTERMULT_ADD ) {
+ for( int j=0; j<2; j++ )
+ tmp = tmp.replace("%LEN"+(j+1)+"%", _inputs.get(j).getVectorLength());
+ }
+ else { //general case
+ CNode mInput = getIntermediateInputVector();
+ if( mInput != null )
+ tmp = tmp.replace("%LEN%", mInput.getVectorLength());
}
+
sb.append(tmp);
//mark as generated
@@ -278,10 +303,19 @@ public class CNodeBinary extends CNode
return sb.toString();
}
+ private CNode getIntermediateInputVector() {
+ for( int i=0; i<2; i++ )
+ if( getInput().get(i).getDataType().isMatrix() )
+ return getInput().get(i);
+ return null;
+ }
+
@Override
public String toString() {
switch(_type) {
case DOT_PRODUCT: return "b(dot)";
+ case VECT_MATRIXMULT: return "b(vmm)";
+ case VECT_OUTERMULT_ADD: return "b(voma)";
case VECT_MULT_ADD: return "b(vma)";
case VECT_DIV_ADD: return "b(vda)";
case VECT_MINUS_ADD: return "b(vmia)";
@@ -362,7 +396,13 @@ public class CNodeBinary extends CNode
boolean vectorScalar = _inputs.get(1).getDataType()==DataType.SCALAR;
_rows = _inputs.get(vectorScalar ? 0 : 1)._rows;
_cols = _inputs.get(vectorScalar ? 0 : 1)._cols;
- _dataType= DataType.MATRIX;
+ _dataType = DataType.MATRIX;
+ break;
+
+ case VECT_OUTERMULT_ADD:
+ _rows = _inputs.get(0)._cols;
+ _cols = _inputs.get(1)._cols;
+ _dataType = DataType.MATRIX;
break;
case VECT_DIV_SCALAR:
@@ -396,8 +436,13 @@ public class CNodeBinary extends CNode
_cols = _inputs.get(scalarVector ? 1 : 0)._cols;
_dataType= DataType.MATRIX;
break;
+
+ case VECT_MATRIXMULT:
+ _rows = _inputs.get(0)._rows;
+ _cols = _inputs.get(1)._cols;
+ _dataType = DataType.MATRIX;
+ break;
-
case DOT_PRODUCT:
//SCALAR Arithmetic
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
index d6a1d34..01ca08e 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
@@ -39,10 +39,10 @@ public class CNodeOuterProduct extends CNodeTpl
+ " public %TMP%() {\n"
+ " _outerProductType = OutProdType.%TYPE%;\n"
+ " }\n"
- + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int k, int rowIndex, int colIndex) { \n"
+ + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rowIndex, int colIndex) { \n"
+ "%BODY_dense%"
+ " }\n"
- + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int k, int rowIndex, int colIndex) { \n"
+ + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int len, int rowIndex, int colIndex) { \n"
+ "%BODY_cellwise%"
+ " return %OUT_cellwise%;\n"
+ " }\n"
@@ -86,7 +86,7 @@ public class CNodeOuterProduct extends CNodeTpl
tmp = tmp.replace("%OUT_cellwise%", getCurrentVarName());
}
//replace size information
- tmp = tmp.replace("%LEN%", "k");
+ tmp = tmp.replace("%LEN%", "len");
tmp = tmp.replace("%POSOUT%", "ci");
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
index 7cba5f7..b74b79d 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
@@ -22,6 +22,7 @@ package org.apache.sysml.hops.codegen.cplan;
import java.util.ArrayList;
import org.apache.sysml.hops.codegen.SpoofFusedOp.SpoofOutputDimsType;
+import org.apache.sysml.hops.codegen.cplan.CNodeBinary.BinType;
import org.apache.sysml.hops.codegen.cplan.CNodeUnary.UnaryType;
import org.apache.sysml.hops.codegen.template.TemplateUtils;
import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType;
@@ -32,25 +33,26 @@ public class CNodeRow extends CNodeTpl
private static final String TEMPLATE =
"package codegen;\n"
+ "import org.apache.sysml.runtime.codegen.LibSpoofPrimitives;\n"
+ + "import org.apache.sysml.runtime.codegen.SpoofOperator.SideInput;\n"
+ "import org.apache.sysml.runtime.codegen.SpoofRowwise;\n"
+ "import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType;\n"
+ "import org.apache.commons.math3.util.FastMath;\n"
+ "\n"
+ "public final class %TMP% extends SpoofRowwise { \n"
+ " public %TMP%() {\n"
- + " super(RowType.%TYPE%, %CBIND0%, %VECT_MEM%);\n"
+ + " super(RowType.%TYPE%, %CBIND0%, %TB1%, %VECT_MEM%);\n"
+ " }\n"
- + " protected void genexec(double[] a, int ai, double[][] b, double[] scalars, double[] c, int len, int rowIndex) { \n"
+ + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int len, int rowIndex) { \n"
+ "%BODY_dense%"
+ " }\n"
- + " protected void genexec(double[] avals, int[] aix, int ai, double[][] b, double[] scalars, double[] c, int alen, int len, int rowIndex) { \n"
+ + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int alen, int len, int rowIndex) { \n"
+ "%BODY_sparse%"
+ " }\n"
+ "}\n";
private static final String TEMPLATE_ROWAGG_OUT = " c[rowIndex] = %IN%;\n";
private static final String TEMPLATE_FULLAGG_OUT = " c[0] += %IN%;\n";
- private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, rowIndex*len, len);\n";
+ private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, rowIndex*%LEN%, %LEN%);\n";
public CNodeRow(ArrayList<CNode> inputs, CNode output ) {
super(inputs, output);
@@ -59,14 +61,6 @@ public class CNodeRow extends CNodeTpl
private RowType _type = null; //access pattern
private int _numVectors = -1; //number of intermediate vectors
- public void setNumVectorIntermediates(int num) {
- _numVectors = num;
- }
-
- public int getNumVectorIntermediates() {
- return _numVectors;
- }
-
public void setRowType(RowType type) {
_type = type;
_hash = 0;
@@ -76,6 +70,15 @@ public class CNodeRow extends CNodeTpl
return _type;
}
+ public void setNumVectorIntermediates(int num) {
+ _numVectors = num;
+ _hash = 0;
+ }
+
+ public int getNumVectorIntermediates() {
+ return _numVectors;
+ }
+
@Override
public void renameInputs() {
rRenameDataNode(_output, _inputs.get(0), "a"); // input matrix
@@ -108,18 +111,26 @@ public class CNodeRow extends CNodeTpl
tmp = tmp.replace("%TYPE%", _type.name());
tmp = tmp.replace("%CBIND0%", String.valueOf(
TemplateUtils.isUnary(_output, UnaryType.CBIND0)));
+ tmp = tmp.replace("%TB1%", String.valueOf(
+ TemplateUtils.containsBinary(_output, BinType.VECT_MATRIXMULT)));
tmp = tmp.replace("%VECT_MEM%", String.valueOf(_numVectors));
return tmp;
}
private String getOutputStatement(String varName) {
- if( !_type.isColumnAgg() ) {
- String tmp = (_type==RowType.NO_AGG) ? TEMPLATE_NOAGG_OUT :
- (_type==RowType.FULL_AGG) ? TEMPLATE_FULLAGG_OUT : TEMPLATE_ROWAGG_OUT;
- return tmp.replace("%IN%", varName);
+ switch( _type ) {
+ case NO_AGG:
+ case NO_AGG_B1:
+ return TEMPLATE_NOAGG_OUT.replace("%IN%", varName)
+ .replace("%LEN%", _output.getVarname()+".length");
+ case FULL_AGG:
+ return TEMPLATE_FULLAGG_OUT.replace("%IN%", varName);
+ case ROW_AGG:
+ return TEMPLATE_ROWAGG_OUT.replace("%IN%", varName);
+ default:
+ return ""; //_type.isColumnAgg()
}
- return "";
}
@Override
@@ -131,12 +142,15 @@ public class CNodeRow extends CNodeTpl
@Override
public SpoofOutputDimsType getOutputDimType() {
switch( _type ) {
- case NO_AGG: return SpoofOutputDimsType.INPUT_DIMS;
- case FULL_AGG: return SpoofOutputDimsType.SCALAR;
- case ROW_AGG: return TemplateUtils.isUnary(_output, UnaryType.CBIND0) ?
- SpoofOutputDimsType.ROW_DIMS2 : SpoofOutputDimsType.ROW_DIMS;
- case COL_AGG: return SpoofOutputDimsType.COLUMN_DIMS_COLS; //row vector
+ case NO_AGG: return SpoofOutputDimsType.INPUT_DIMS;
+ case NO_AGG_B1: return SpoofOutputDimsType.ROW_RANK_DIMS;
+ case FULL_AGG: return SpoofOutputDimsType.SCALAR;
+ case ROW_AGG: return TemplateUtils.isUnary(_output, UnaryType.CBIND0) ?
+ SpoofOutputDimsType.ROW_DIMS2 : SpoofOutputDimsType.ROW_DIMS;
+ case COL_AGG: return SpoofOutputDimsType.COLUMN_DIMS_COLS; //row vector
case COL_AGG_T: return SpoofOutputDimsType.COLUMN_DIMS_ROWS; //column vector
+ case COL_AGG_B1: return SpoofOutputDimsType.COLUMN_RANK_DIMS;
+ case COL_AGG_B1_T: return SpoofOutputDimsType.COLUMN_RANK_DIMS_T;
default:
throw new RuntimeException("Unsupported row type: "+_type.toString());
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index 500b309..85800b8 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -185,6 +185,10 @@ public class CNodeUnary extends CNode
tmp = tmp.replace("%POS1%", spos);
tmp = tmp.replace("%POS2%", spos);
+ //replace length
+ if( _inputs.get(0).getDataType().isMatrix() )
+ tmp = tmp.replace("%LEN%", _inputs.get(0).getVectorLength());
+
sb.append(tmp);
//mark as generated
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 601d664..c0c8c4e 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -50,6 +50,7 @@ import org.apache.sysml.hops.Hop.Direction;
import org.apache.sysml.hops.Hop.OpOp1;
import org.apache.sysml.hops.Hop.OpOp2;
import org.apache.sysml.parser.Expression.DataType;
+import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
import org.apache.sysml.runtime.matrix.data.Pair;
public class TemplateRow extends TemplateBase
@@ -73,8 +74,17 @@ public class TemplateRow extends TemplateBase
public boolean open(Hop hop) {
return (hop instanceof BinaryOp && hop.dimsKnown() && isValidBinaryOperation(hop)
&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1)
- || (hop instanceof AggBinaryOp && hop.dimsKnown() && hop.getDim2()==1
+ || (hop instanceof AggBinaryOp && hop.dimsKnown() && hop.getDim2()==1 //MV
&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1)
+ || (hop instanceof AggBinaryOp && hop.dimsKnown() && LibMatrixMult.isSkinnyRightHandSide(
+ hop.getInput().get(0).getDim1(), hop.getInput().get(0).getDim2(), //MM
+ hop.getInput().get(1).getDim1(), hop.getInput().get(1).getDim2())
+ && hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1
+ && !HopRewriteUtils.isOuterProductLikeMM(hop))
+ || (HopRewriteUtils.isTransposeOperation(hop) && hop.getParent().size()==1
+ && hop.getParent().get(0) instanceof AggBinaryOp && hop.getParent().get(0).dimsKnown()
+ && hop.getParent().get(0).getInput().indexOf(hop) == 0
+ && isFuseSkinnyMatrixMult(hop.getParent().get(0)))
|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()!=Direction.RowCol
&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1
&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG));
@@ -88,20 +98,24 @@ public class TemplateRow extends TemplateBase
&& input.getDim2()==1 && hop.getInput().get(1).getDim2()==1
&& HopRewriteUtils.isEmpty(hop.getInput().get(1)))
|| ((hop instanceof UnaryOp || hop instanceof ParameterizedBuiltinOp)
- && TemplateCell.isValidOperation(hop))
+ && TemplateCell.isValidOperation(hop))
|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()!=Direction.RowCol
&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG))
|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection() == Direction.RowCol
&& ((AggUnaryOp)hop).getOp() == AggOp.SUM )
- || (hop instanceof AggBinaryOp && hop.getDim1()>1 && hop.getDim2()==1
- && HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))));
+ || (hop instanceof AggBinaryOp && hop.getDim1()>1 && hop.getDim2()==1 //MV
+ && HopRewriteUtils.isTransposeOperation(hop.getInput().get(0)))
+ || (hop instanceof AggBinaryOp && hop.dimsKnown() && isFuseSkinnyMatrixMult(hop) //MM
+ && HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
+ && hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1));
}
@Override
public boolean merge(Hop hop, Hop input) {
//merge rowagg tpl with cell tpl if input is a vector
return !isClosed() &&
- ((hop instanceof BinaryOp && isValidBinaryOperation(hop))
+ ((hop instanceof BinaryOp && isValidBinaryOperation(hop)
+ && hop.getDim1() > 1 && input.getDim1()>1)
||(hop instanceof AggBinaryOp && input.getDim2()==1
&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))));
}
@@ -117,12 +131,18 @@ public class TemplateRow extends TemplateBase
return CloseType.OPEN;
}
- private boolean isValidBinaryOperation(Hop hop) {
- //exclude unsupported and matrix-rowvector ops
- return TemplateUtils.isOperationSupported(hop)
- && (HopRewriteUtils.isBinaryMatrixScalarOperation(hop)
- || HopRewriteUtils.isBinaryMatrixColVectorOperation(hop)
- || HopRewriteUtils.isBinaryMatrixMatrixOperation(hop));
+ private static boolean isValidBinaryOperation(Hop hop) {
+ //support for matrix-scalar, matrix-col_vector,
+ //matrix-row_vector, and matrix-matrix
+ return TemplateUtils.isOperationSupported(hop);
+ }
+
+ private static boolean isFuseSkinnyMatrixMult(Hop hop) {
+ //check for fusable but not opening matrix multiply (vect_outer-mult)
+ Hop in1 = hop.getInput().get(0); //transpose
+ Hop in2 = hop.getInput().get(1);
+ return LibMatrixMult.isSkinnyRightHandSide(in1.getDim2(), in1.getDim1(), hop.getDim1(), hop.getDim2())
+ || LibMatrixMult.isSkinnyRightHandSide(in2.getDim1(), in2.getDim2(), hop.getDim2(), hop.getDim1());
}
@Override
@@ -138,7 +158,7 @@ public class TemplateRow extends TemplateBase
//reorder inputs (ensure matrix is first input, and other inputs ordered by size)
Hop[] sinHops = inHops.stream()
.filter(h -> !(h.getDataType().isScalar() && tmp.get(h.getHopID()).isLiteral()))
- .sorted(new HopInputComparator(inHops2.get("X"))).toArray(Hop[]::new);
+ .sorted(new HopInputComparator(inHops2.get("X"),inHops2.get("B1"))).toArray(Hop[]::new);
//construct template node
ArrayList<CNode> inputs = new ArrayList<CNode>();
@@ -146,7 +166,8 @@ public class TemplateRow extends TemplateBase
inputs.add(tmp.get(in.getHopID()));
CNode output = tmp.get(hop.getHopID());
CNodeRow tpl = new CNodeRow(inputs, output);
- tpl.setRowType(TemplateUtils.getRowType(hop, sinHops[0]));
+ tpl.setRowType(TemplateUtils.getRowType(hop,
+ inHops2.get("X"), inHops2.get("B1")));
tpl.setNumVectorIntermediates(TemplateUtils
.determineMinVectorIntermediates(output));
tpl.getOutput().resetVisitStatus();
@@ -217,7 +238,13 @@ public class TemplateRow extends TemplateBase
inHops.add(hop.getInput().get(0).getInput().get(0));
//note: vectorMultAdd applicable to vector-scalar, and vector-vector
- out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD);
+ if( hop.getInput().get(1).getDim2() == 1 )
+ out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD);
+ else {
+ out = new CNodeBinary(cdata1, cdata2, BinType.VECT_OUTERMULT_ADD);
+ if( !inHops2.containsKey("B1") )
+ inHops2.put("B1", hop.getInput().get(1));
+ }
inHops2.put("X", hop.getInput().get(0).getInput().get(0));
}
else
@@ -225,12 +252,24 @@ public class TemplateRow extends TemplateBase
if(hop.getInput().get(0).getDim2()==1 && hop.getInput().get(1).getDim2()==1)
out = new CNodeBinary((cdata1.getDataType()==DataType.SCALAR)? cdata1 : new CNodeUnary(cdata1, UnaryType.LOOKUP0),
(cdata2.getDataType()==DataType.SCALAR)? cdata2 : new CNodeUnary(cdata2, UnaryType.LOOKUP0), BinType.MULT);
- else {
+ else if( hop.getInput().get(1).getDim2()==1 ) {
out = new CNodeBinary(cdata1, cdata2, BinType.DOT_PRODUCT);
inHops2.put("X", hop.getInput().get(0));
}
+ else {
+ out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MATRIXMULT);
+ inHops2.put("X", hop.getInput().get(0));
+ inHops2.put("B1", hop.getInput().get(1));
+ }
}
}
+ else if( HopRewriteUtils.isTransposeOperation(hop) )
+ {
+ out = TemplateUtils.skipTranspose(tmp.get(hop.getHopID()),
+ hop, tmp, compileLiterals);
+ if( out instanceof CNodeData && !inHops.contains(hop.getInput().get(0)) )
+ inHops.add(hop.getInput().get(0));
+ }
else if(hop instanceof UnaryOp)
{
CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
@@ -272,7 +311,8 @@ public class TemplateRow extends TemplateBase
|| (hop.getInput().get(1).getDim1() > 1 && hop.getInput().get(1).getDim2() > 1))
{
if( HopRewriteUtils.isBinary(hop, SUPPORTED_VECT_BINARY) ) {
- if( TemplateUtils.isMatrix(cdata1) && TemplateUtils.isMatrix(cdata2) ) {
+ if( TemplateUtils.isMatrix(cdata1) && (TemplateUtils.isMatrix(cdata2)
+ || TemplateUtils.isRowVector(cdata2)) ) {
String opname = "VECT_"+((BinaryOp)hop).getOp().name();
out = new CNodeBinary(cdata1, cdata2, BinType.valueOf(opname));
}
@@ -360,19 +400,21 @@ public class TemplateRow extends TemplateBase
public static class HopInputComparator implements Comparator<Hop>
{
private final Hop _X;
+ private final Hop _B1;
- public HopInputComparator(Hop X) {
+ public HopInputComparator(Hop X, Hop B1) {
_X = X;
+ _B1 = B1;
}
@Override
public int compare(Hop h1, Hop h2) {
long ncells1 = h1.getDataType()==DataType.SCALAR ? Long.MIN_VALUE :
- (h1==_X) ? Long.MAX_VALUE :
- h1.dimsKnown() ? h1.getDim1()*h1.getDim2() : Long.MAX_VALUE-1;
+ (h1==_X) ? Long.MAX_VALUE : (h1==_B1) ? Long.MAX_VALUE-1 :
+ h1.dimsKnown() ? h1.getDim1()*h1.getDim2() : Long.MAX_VALUE-2;
long ncells2 = h2.getDataType()==DataType.SCALAR ? Long.MIN_VALUE :
- (h2==_X) ? Long.MAX_VALUE :
- h2.dimsKnown() ? h2.getDim1()*h2.getDim2() : Long.MAX_VALUE-1;
+ (h2==_X) ? Long.MAX_VALUE : (h2==_B1) ? Long.MAX_VALUE-1 :
+ h2.dimsKnown() ? h2.getDim1()*h2.getDim2() : Long.MAX_VALUE-2;
return (ncells1 > ncells2) ? -1 : (ncells1 < ncells2) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index da803cd..4bd5bf1 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -19,11 +19,7 @@
package org.apache.sysml.hops.codegen.template;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
import org.apache.commons.lang.ArrayUtils;
import org.apache.sysml.hops.AggBinaryOp;
@@ -142,74 +138,7 @@ public class TemplateUtils
return TernaryType.contains(((ParameterizedBuiltinOp)h).getOp().name());
return false;
}
-
- private static void rfindChildren(Hop hop, HashSet<Hop> children ) {
- if( hop instanceof UnaryOp || (hop instanceof BinaryOp && hop.getInput().get(0).getDataType() == DataType.MATRIX && TemplateUtils.isVectorOrScalar( hop.getInput().get(1))) || (hop instanceof BinaryOp && TemplateUtils.isVectorOrScalar( hop.getInput().get(0)) && hop.getInput().get(1).getDataType() == DataType.MATRIX) //unary operation or binary operaiton with one matrix and a scalar
- && hop.getDataType() == DataType.MATRIX )
- {
- if(!children.contains(hop))
- children.add(hop);
- Hop matrix = TemplateUtils.isMatrix(hop.getInput().get(0)) ? hop.getInput().get(0) : hop.getInput().get(1);
- rfindChildren(matrix,children);
- }
- else
- children.add(hop);
- }
- private static Hop findCommonChild(Hop hop1, Hop hop2) {
- //this method assumes that each two nodes have at most one common child
- LinkedHashSet<Hop> children1 = new LinkedHashSet<Hop>();
- LinkedHashSet<Hop> children2 = new LinkedHashSet<Hop>();
-
- rfindChildren(hop1, children1 );
- rfindChildren(hop2, children2 );
-
- //iterate on one set and find the first common child in the other set
- Iterator<Hop> iter = children1.iterator();
- while (iter.hasNext()) {
- Hop candidate = iter.next();
- if(children2.contains(candidate))
- return candidate;
- }
- return null;
- }
-
- public static Hop commonChild(ArrayList<Hop> _adddedMatrices, Hop input) {
- Hop currentChild = null;
- //loop on every added matrix and find its common child with the input, if all of them have the same common child then return it, otherwise null
- for(Hop addedMatrix : _adddedMatrices)
- {
- Hop child = findCommonChild(addedMatrix,input);
- if(child == null) // did not find a common child
- return null;
- if(currentChild == null) // first common child to be seen
- currentChild = child;
- else if(child.getHopID() != currentChild.getHopID())
- return null;
- }
- return currentChild;
- }
-
- public static HashSet<Long> rGetInputHopIDs( CNode node, HashSet<Long> ids ) {
- if( node instanceof CNodeData && !node.isLiteral() )
- ids.add(((CNodeData)node).getHopID());
-
- for( CNode c : node.getInput() )
- rGetInputHopIDs(c, ids);
-
- return ids;
- }
-
- public static Hop[] mergeDistinct(HashSet<Long> ids, Hop[] input1, Hop[] input2) {
- Hop[] ret = new Hop[ids.size()];
- int pos = 0;
- for( Hop[] input : new Hop[][]{input1, input2} )
- for( Hop c : input )
- if( ids.contains(c.getHopID()) )
- ret[pos++] = c;
- return ret;
- }
-
public static TemplateBase createTemplate(TemplateType type) {
return createTemplate(type, false);
}
@@ -242,21 +171,31 @@ public class TemplateUtils
CellType.FULL_AGG : CellType.ROW_AGG) : CellType.NO_AGG;
}
- public static RowType getRowType(Hop output, Hop input) {
- if( HopRewriteUtils.isEqualSize(output, input) )
+ public static RowType getRowType(Hop output, Hop... inputs) {
+ Hop X = inputs[0];
+ Hop B1 = (inputs.length>1) ? inputs[1] : null;
+ if( HopRewriteUtils.isEqualSize(output, X) )
return RowType.NO_AGG;
- else if( output.getDim1()==input.getDim1() && (output.getDim2()==1
+ else if( B1 != null && output.getDim1()==X.getDim1() && output.getDim2()==B1.getDim2() )
+ return RowType.NO_AGG_B1;
+ else if( output.getDim1()==X.getDim1() && (output.getDim2()==1
|| HopRewriteUtils.isBinary(output, OpOp2.CBIND))
&& !(output instanceof AggBinaryOp && HopRewriteUtils
- .isTransposeOfItself(output.getInput().get(0),input)))
+ .isTransposeOfItself(output.getInput().get(0),X)))
return RowType.ROW_AGG;
else if( output instanceof AggUnaryOp
&& ((AggUnaryOp)output).getDirection()==Direction.RowCol )
return RowType.FULL_AGG;
- else if( output.getDim1()==input.getDim2() && output.getDim2()==1 )
+ else if( output.getDim1()==X.getDim2() && output.getDim2()==1 )
return RowType.COL_AGG_T;
- else
+ else if( output.getDim1()==1 && output.getDim2()==X.getDim2() )
return RowType.COL_AGG;
+ else if( B1 != null && output.getDim1()==X.getDim2() && output.getDim2()==B1.getDim2() )
+ return RowType.COL_AGG_B1_T;
+ else if( B1 != null && output.getDim1()==B1.getDim2() && output.getDim2()==X.getDim2())
+ return RowType.COL_AGG_B1;
+ else
+ throw new RuntimeException("Unknown row type.");
}
public static AggOp getAggOp(Hop hop) {
@@ -293,6 +232,11 @@ public class TemplateUtils
&& ArrayUtils.contains(types, ((CNodeUnary)node).getType());
}
+ public static boolean isBinary(CNode node, BinType...types) {
+ return node instanceof CNodeBinary
+ && ArrayUtils.contains(types, ((CNodeBinary)node).getType());
+ }
+
public static boolean isTernary(CNode node, TernaryType...types) {
return node instanceof CNodeTernary
&& ArrayUtils.contains(types, ((CNodeTernary)node).getType());
@@ -333,7 +277,8 @@ public class TemplateUtils
CNode output = tpl.getOutput();
return ((output instanceof CNodeUnary
&& !TemplateUtils.isUnary(output, UnaryType.EXP, UnaryType.LOG))
- || output instanceof CNodeBinary)
+ || (output instanceof CNodeBinary
+ && !TemplateUtils.isBinary(output, BinType.VECT_OUTERMULT_ADD)))
&& hasOnlyDataNodeOrLookupInputs(output);
}
@@ -365,8 +310,7 @@ public class TemplateUtils
public static boolean isUnaryOperatorPipeline(CNode node) {
if( node.isVisited() ) {
//second reference to vector intermediate invalidates a unary pipeline
- return !((node instanceof CNodeBinary && ((CNodeBinary)node).getType().isVectorPrimitive())
- || (node instanceof CNodeUnary && ((CNodeUnary)node).getType().isVectorScalarPrimitive()));
+ return !(node instanceof CNodeBinary && ((CNodeBinary)node).getType().isVectorPrimitive());
}
boolean ret = true;
for( CNode input : node.getInput() )
@@ -382,8 +326,9 @@ public class TemplateUtils
for( CNode input : node.getInput() )
max = Math.max(max, getMaxVectorIntermediates(input));
max = Math.max(max, (node instanceof CNodeBinary)?
- ((CNodeBinary)node).getType().isVectorVectorPrimitive() ? 3 :
- ((CNodeBinary)node).getType().isVectorScalarPrimitive() ? 2 : 0 : 0);
+ (((CNodeBinary)node).getType().isVectorVectorPrimitive() ? 3 :
+ ((CNodeBinary)node).getType().isVectorScalarPrimitive() ? 2 :
+ ((CNodeBinary)node).getType().isVectorMatrixPrimitive() ? 1 : 0) : 0);
max = Math.max(max, (node instanceof CNodeUnary
&& ((CNodeUnary)node).getType().isVectorScalarPrimitive()) ? 2 : 0);
node.setVisited();
@@ -432,4 +377,22 @@ public class TemplateUtils
}
return ret;
}
+
+ public static boolean containsBinary(CNode node, BinType type) {
+ node.resetVisitStatus();
+ boolean ret = rContainsBinary(node, type);
+ node.resetVisitStatus();
+ return ret;
+ }
+
+ public static boolean rContainsBinary(CNode node, BinType type) {
+ if( node.isVisited() )
+ return false;
+ boolean ret = false;
+ for( CNode input : node.getInput() )
+ ret |= rContainsBinary(input, type);
+ ret |= isBinary(node, type);
+ node.setVisited();
+ return ret;
+ }
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index ad2530d..1108c08 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -20,6 +20,7 @@
package org.apache.sysml.runtime.codegen;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.LinkedList;
import org.apache.commons.math3.util.FastMath;
@@ -57,6 +58,50 @@ public class LibSpoofPrimitives
return LibMatrixMult.dotProduct(a, b, aix, ai, bi, len);
}
+ public static double[] vectMatrixMult(double[] a, double[] b, int ai, int bi, int len) {
+ //note: assumption b is already transposed for efficient dot products
+ int m2clen = b.length / len;
+ double[] c = allocVector(m2clen, false);
+ for( int j = 0, bix = bi; j < m2clen; j++, bix+=len )
+ c[j] = LibMatrixMult.dotProduct(a, b, ai, bix, len);
+ return c;
+ }
+
+ public static double[] vectMatrixMult(double[] a, double[] b, int[] aix, int ai, int bi, int alen, int len) {
+ //note: assumption b is already transposed for efficient dot products
+ int m2clen = b.length / len;
+ double[] c = allocVector(m2clen, false);
+ for( int j = 0, bix = bi; j < m2clen; j++, bix+=len )
+ c[j] = LibMatrixMult.dotProduct(a, b, aix, ai, bix, alen);
+ return c;
+ }
+
+ public static void vectOuterMultAdd(double[] a, double[] b, double[] c, int ai, int bi, int ci, int len1, int len2) {
+ //rest, not aligned to 4-blocks
+ final int bn = len1%4;
+ for( int i=0, cix=ci; i < bn; i++, cix+=len2 )
+ if( a[ai+i] != 0 )
+ LibMatrixMult.vectMultiplyAdd(a[ai+i], b, c, bi, cix, len2);
+
+ //unrolled 4-block (for fewer L1-dcache loads)
+ for( int i=bn, cix=ci+bn*len2; i < len1; i+=4, cix+=4*len2 ) {
+ final int cix1=cix, cix2=cix+len2, cix3=cix+2*len2, cix4=cix+3*len2;
+ final double aval1=a[ai+i], aval2=a[ai+i+1], aval3=a[ai+i+2], aval4=a[ai+i+3];
+ for( int j=0; j<len2; j++ ) {
+ final double bval = b[bi+j];
+ c[cix1 + j] += aval1 * bval;
+ c[cix2 + j] += aval2 * bval;
+ c[cix3 + j] += aval3 * bval;
+ c[cix4 + j] += aval4 * bval;
+ }
+ }
+ }
+
+ public static void vectOuterMultAdd(double[] a, double[] b, double[] c, int[] aix, int ai, int bi, int ci, int alen, int len1, int len2) {
+ for( int i=0; i < alen; i++ )
+ LibMatrixMult.vectMultiplyAdd(a[ai+i], b, c, bi, ci+aix[ai+i]*len2, len2);
+ }
+
public static void vectMultAdd(double[] a, double bval, double[] c, int bi, int ci, int len) {
if( a == null || bval == 0 ) return;
LibMatrixMult.vectMultiplyAdd(bval, a, c, bi, ci, len);
@@ -1227,7 +1272,14 @@ public class LibSpoofPrimitives
//dynamic memory management
public static void setupThreadLocalMemory(int numVectors, int len) {
+ setupThreadLocalMemory(numVectors, len, -1);
+ }
+
+ public static void setupThreadLocalMemory(int numVectors, int len, int len2) {
LinkedList<double[]> list = new LinkedList<double[]>();
+ if( len2 >= 0 )
+ for( int i=0; i<numVectors; i++ )
+ list.addLast(new double[len2]);
for( int i=0; i<numVectors; i++ )
list.addLast(new double[len]);
memPool.set(list);
@@ -1242,24 +1294,29 @@ public class LibSpoofPrimitives
}
private static double[] allocVector(int len, boolean reset, double resetVal) {
- LinkedList<double[]> list = memPool.get();
+ LinkedList<double[]> list = memPool.get();
- //sanity check for missing setup
- if( list.isEmpty() ) {
- double[] tmp = new double[len];
- if( reset && resetVal != 0 )
- Arrays.fill(tmp, resetVal);
- return tmp;
+ //find and remove vector with matching len
+ double[] vect = null;
+ Iterator<double[]> iter = list.iterator();
+ while( iter.hasNext() ) {
+ double[] tmp = iter.next();
+ if( tmp.length == len ) {
+ vect = tmp;
+ iter.remove();
+ break;
+ }
}
- //get and re-queue first entry
- double[] tmp = list.removeFirst();
- list.addLast(tmp);
+ //allocate new vector or re-queue if required
+ if( vect == null )
+ vect = new double[len];
+ else
+ list.addLast(vect);
//reset vector if required
if( reset )
- Arrays.fill(tmp, resetVal);
- return tmp;
+ Arrays.fill(vect, resetVal);
+ return vect;
}
}
-
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index cc8ef69..15de508 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -118,7 +118,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
}
//input preparation
- SideInput[] b = prepInputMatricesAbstract(inputs);
+ SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
@@ -198,7 +198,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
//input preparation
MatrixBlock a = inputs.get(0);
- SideInput[] b = prepInputMatricesAbstract(inputs);
+ SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = a.getNumRows();
final int n = a.getNumColumns();
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
index e7e3b54..c3755d4 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
@@ -91,7 +91,7 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria
setInitialOutputValues(c);
//input preparation
- SideInput[] b = prepInputMatricesAbstract(inputs);
+ SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
index d3bf410..9561fcb 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.LibMatrixReorg;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.util.DataConverter;
import org.apache.sysml.runtime.util.UtilFunctions;
@@ -59,72 +60,62 @@ public abstract class SpoofOperator implements Serializable
return execute(inputs, scalars);
}
- protected double[][] prepInputMatricesDense(ArrayList<MatrixBlock> inputs)
- throws DMLRuntimeException
- {
- return prepInputMatricesDense(inputs, 1, inputs.size()-1);
+ protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs) throws DMLRuntimeException {
+ return prepInputMatrices(inputs, 1, inputs.size()-1, false, false);
}
- protected double[][] prepInputMatricesDense(ArrayList<MatrixBlock> inputs, int offset)
- throws DMLRuntimeException
- {
- return prepInputMatricesDense(inputs, offset, inputs.size()-offset);
+ protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs, boolean denseOnly) throws DMLRuntimeException {
+ return prepInputMatrices(inputs, 1, inputs.size()-1, denseOnly, false);
}
- protected double[][] prepInputMatricesDense(ArrayList<MatrixBlock> inputs, int offset, int len)
- throws DMLRuntimeException
- {
- double[][] b = new double[len][];
- for(int i=offset; i<offset+len; i++) {
- if( inputs.get(i) instanceof CompressedMatrixBlock )
- inputs.set(i, ((CompressedMatrixBlock)inputs.get(i)).decompress());
-
- //convert empty or sparse to dense temporary block (note: we don't do
- //this in place because this block might be used by multiple threads)
- if( inputs.get(i).isInSparseFormat() && inputs.get(i).isAllocated() ) {
- MatrixBlock tmp = inputs.get(i);
- b[i-offset] = DataConverter.convertToDoubleVector(tmp);
- LOG.warn(getClass().getName()+": Converted "+tmp.getNumRows()+"x"+tmp.getNumColumns()+
- ", nnz="+tmp.getNonZeros()+" sideways input matrix from sparse to dense.");
- }
- //use existing dense block
- else {
- b[i-offset] = inputs.get(i).getDenseBlock();
- }
- }
-
- return b;
+ protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs, int offset, boolean denseOnly) throws DMLRuntimeException {
+ return prepInputMatrices(inputs, offset, inputs.size()-offset, denseOnly, false);
}
- protected SideInput[] prepInputMatricesAbstract(ArrayList<MatrixBlock> inputs)
- throws DMLRuntimeException
- {
- return prepInputMatricesAbstract(inputs, 1, inputs.size()-1);
- }
-
- protected SideInput[] prepInputMatricesAbstract(ArrayList<MatrixBlock> inputs, int offset)
- throws DMLRuntimeException
- {
- return prepInputMatricesAbstract(inputs, offset, inputs.size()-offset);
+ protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs, boolean denseOnly, boolean tB1) throws DMLRuntimeException {
+ return prepInputMatrices(inputs, 1, inputs.size()-1, denseOnly, tB1);
}
- protected SideInput[] prepInputMatricesAbstract(ArrayList<MatrixBlock> inputs, int offset, int len)
+ protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs, int offset, int len, boolean denseOnly, boolean tB1)
throws DMLRuntimeException
{
SideInput[] b = new SideInput[len];
for(int i=offset; i<offset+len; i++) {
+ //decompress if necessary
if( inputs.get(i) instanceof CompressedMatrixBlock )
inputs.set(i, ((CompressedMatrixBlock)inputs.get(i)).decompress());
+ //transpose if necessary
+ int clen = inputs.get(i).getNumColumns();
+ MatrixBlock in = (tB1 && i==1 ) ? LibMatrixReorg.transpose(inputs.get(i),
+ new MatrixBlock(clen, inputs.get(i).getNumRows(), false)) : inputs.get(i);
- if( inputs.get(i).isInSparseFormat() && inputs.get(i).isAllocated() )
- b[i-offset] = new SideInput(null, inputs.get(i));
- else
- b[i-offset] = new SideInput(inputs.get(i).getDenseBlock(), null);
+ //create side input
+ if( denseOnly && (in.isInSparseFormat() || !in.isAllocated()) ) {
+ //convert empty or sparse to dense temporary block (note: we don't do
+ //this in place because this block might be used by multiple threads)
+ b[i-offset] = new SideInput(DataConverter.convertToDoubleVector(in), null, clen);
+ LOG.warn(getClass().getName()+": Converted "+in.getNumRows()+"x"+in.getNumColumns()+
+ ", nnz="+in.getNonZeros()+" sideways input matrix from sparse to dense.");
+ }
+ else if( in.isInSparseFormat() && in.isAllocated() ) {
+ b[i-offset] = new SideInput(null, in, clen);
+ }
+ else {
+ b[i-offset] = new SideInput(
+ in.getDenseBlock(), null, clen);
+ }
}
return b;
}
+ public double[][] getDenseMatrices(SideInput[] inputs) {
+ double[][] ret = new double[inputs.length][];
+ for( int i=0; i<inputs.length; i++ )
+ ret[i] = inputs[i].ddat;
+ return ret;
+ }
+
protected double[] prepInputScalars(ArrayList<ScalarObject> scalarObjects) {
double[] scalars = new double[scalarObjects.size()];
for(int i=0; i < scalarObjects.size(); i++)
@@ -161,8 +152,8 @@ public abstract class SpoofOperator implements Serializable
protected static double getValue(SideInput data, int rowIndex) {
//note: wrapper sideinput guaranteed to exist
- return (data.dBlock!=null) ? data.dBlock[rowIndex] :
- (data.mBlock!=null) ? data.mBlock.quickGetValue(rowIndex, 0) : 0;
+ return (data.ddat!=null) ? data.ddat[rowIndex] :
+ (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, 0) : 0;
}
protected static double getValue(SideInput data, int n, double rowIndex, double colIndex) {
@@ -173,17 +164,19 @@ public abstract class SpoofOperator implements Serializable
protected static double getValue(SideInput data, int n, int rowIndex, int colIndex) {
//note: wrapper sideinput guaranteed to exist
- return (data.dBlock!=null) ? data.dBlock[rowIndex*n+colIndex] :
- (data.mBlock!=null) ? data.mBlock.quickGetValue(rowIndex, colIndex) : 0;
+ return (data.ddat!=null) ? data.ddat[rowIndex*n+colIndex] :
+ (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, colIndex) : 0;
}
public static class SideInput {
- private final double[] dBlock;
- private final MatrixBlock mBlock;
-
- public SideInput(double[] ddata, MatrixBlock mdata) {
- dBlock = ddata;
- mBlock = mdata;
+ public final double[] ddat;
+ public final MatrixBlock mdat;
+ public final int clen;
+
+ public SideInput(double[] ddata, MatrixBlock mdata, int clength) {
+ ddat = ddata;
+ mdat = mdata;
+ clen = clength;
}
}
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index c66d065..90c7507 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -79,8 +79,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator
return new DoubleObject(0);
//input preparation
- double[][] ab = prepInputMatricesDense(inputs, 1, 2);
- double[][] b = prepInputMatricesDense(inputs, 3);
+ double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
+ double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -112,8 +112,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator
return new DoubleObject(0);
//input preparation
- double[][] ab = prepInputMatricesDense(inputs, 1, 2);
- double[][] b = prepInputMatricesDense(inputs, 3);
+ double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
+ double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -179,8 +179,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator
out.allocateDenseOrSparseBlock();
//input preparation
- double[][] ab = prepInputMatricesDense(inputs, 1, 2);
- double[][] b = prepInputMatricesDense(inputs, 3);
+ double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
+ double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -257,8 +257,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator
}
//input preparation
- double[][] ab = prepInputMatricesDense(inputs, 1, 2);
- double[][] b = prepInputMatricesDense(inputs, 3);
+ double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
+ double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 611e4ad..13536d3 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -46,23 +46,32 @@ public abstract class SpoofRowwise extends SpoofOperator
public enum RowType {
NO_AGG, //no aggregation
+ NO_AGG_B1, //no aggregation w/ matrix mult B1
FULL_AGG, //full row/col aggregation
ROW_AGG, //row aggregation (e.g., rowSums() or X %*% v)
COL_AGG, //col aggregation (e.g., colSums() or t(y) %*% X)
- COL_AGG_T; //transposed col aggregation (e.g., t(X) %*% y)
+ COL_AGG_T, //transposed col aggregation (e.g., t(X) %*% y)
+ COL_AGG_B1, //col aggregation w/ matrix mult B1
+ COL_AGG_B1_T; //transposed col aggregation w/ matrix mult B1
public boolean isColumnAgg() {
- return (this == COL_AGG || this == COL_AGG_T);
+ return (this == COL_AGG || this == COL_AGG_T)
+ || (this == COL_AGG_B1) || (this == COL_AGG_B1_T);
}
+ public boolean isRowTypeB1() {
+ return (this == NO_AGG_B1) || (this == COL_AGG_B1) || (this == COL_AGG_B1_T);
+ }
}
protected final RowType _type;
protected final boolean _cbind0;
+ protected final boolean _tB1;
protected final int _reqVectMem;
- public SpoofRowwise(RowType type, boolean cbind0, int reqVectMem) {
+ public SpoofRowwise(RowType type, boolean cbind0, boolean tB1, int reqVectMem) {
_type = type;
_cbind0 = cbind0;
+ _tB1 = tB1;
_reqVectMem = reqVectMem;
}
@@ -112,17 +121,18 @@ public abstract class SpoofRowwise extends SpoofOperator
//result allocation and preparations
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
+ final int n2 = _type.isRowTypeB1() ? inputs.get(1).getNumColumns() : -1;
if( !aggIncr || !out.isAllocated() )
- allocateOutputMatrix(m, n, out);
+ allocateOutputMatrix(m, n, n2, out);
double[] c = out.getDenseBlock();
//input preparation
- double[][] b = prepInputMatricesDense(inputs);
+ SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, true, _tB1);
double[] scalars = prepInputScalars(scalarObjects);
//setup thread-local memory if necessary
if( allocTmp )
- LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n);
+ LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
//core sequential execute
MatrixBlock a = inputs.get(0);
@@ -157,10 +167,11 @@ public abstract class SpoofRowwise extends SpoofOperator
//result allocation and preparations
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
- allocateOutputMatrix(m, n, out);
+ final int n2 = _type.isRowTypeB1() ? inputs.get(1).getNumColumns() : -1;
+ allocateOutputMatrix(m, n, n2, out);
//input preparation
- double[][] b = prepInputMatricesDense(inputs);
+ SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, true, _tB1);
double[] scalars = prepInputScalars(scalarObjects);
//core parallel execute
@@ -173,10 +184,10 @@ public abstract class SpoofRowwise extends SpoofOperator
//execute tasks
ArrayList<ParColAggTask> tasks = new ArrayList<ParColAggTask>();
for( int i=0; i<nk & i*blklen<m; i++ )
- tasks.add(new ParColAggTask(inputs.get(0), b, scalars, n, i*blklen, Math.min((i+1)*blklen, m)));
+ tasks.add(new ParColAggTask(inputs.get(0), b, scalars, n, n2, i*blklen, Math.min((i+1)*blklen, m)));
List<Future<double[]>> taskret = pool.invokeAll(tasks);
//aggregate partial results
- int len = _type.isColumnAgg() ? n : 1;
+ int len = _type.isColumnAgg() ? out.getNumRows()*out.getNumColumns() : 1;
for( Future<double[]> task : taskret )
LibMatrixMult.vectAdd(task.get(), out.getDenseBlock(), 0, 0, len);
out.recomputeNonZeros();
@@ -185,7 +196,7 @@ public abstract class SpoofRowwise extends SpoofOperator
//execute tasks
ArrayList<ParExecTask> tasks = new ArrayList<ParExecTask>();
for( int i=0; i<nk & i*blklen<m; i++ )
- tasks.add(new ParExecTask(inputs.get(0), b, out, scalars, n, i*blklen, Math.min((i+1)*blklen, m)));
+ tasks.add(new ParExecTask(inputs.get(0), b, out, scalars, n, n2, i*blklen, Math.min((i+1)*blklen, m)));
List<Future<Long>> taskret = pool.invokeAll(tasks);
//aggregate nnz, no need to aggregate results
long nnz = 0;
@@ -202,18 +213,22 @@ public abstract class SpoofRowwise extends SpoofOperator
}
}
- private void allocateOutputMatrix(int m, int n, MatrixBlock out) {
+ private void allocateOutputMatrix(int m, int n, int n2, MatrixBlock out) {
switch( _type ) {
- case NO_AGG: out.reset(m, n, false); break;
- case FULL_AGG: out.reset(1, 1, false); break;
- case ROW_AGG: out.reset(m, 1+(_cbind0?1:0), false); break;
- case COL_AGG: out.reset(1, n, false); break;
- case COL_AGG_T: out.reset(n, 1, false); break;
+ case NO_AGG: out.reset(m, n, false); break;
+ case NO_AGG_B1: out.reset(m, n2, false); break;
+ case FULL_AGG: out.reset(1, 1, false); break;
+ case ROW_AGG: out.reset(m, 1+(_cbind0?1:0), false); break;
+ case COL_AGG: out.reset(1, n, false); break;
+ case COL_AGG_T: out.reset(n, 1, false); break;
+ case COL_AGG_B1: out.reset(n2, n, false); break;
+ case COL_AGG_B1_T: out.reset(n, n2, false); break;
+
}
out.allocateDenseBlock();
}
- private void executeDense(double[] a, double[][] b, double[] scalars, double[] c, int n, int rl, int ru)
+ private void executeDense(double[] a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru)
{
if( a == null )
return;
@@ -224,7 +239,7 @@ public abstract class SpoofRowwise extends SpoofOperator
}
}
- private void executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int n, int rl, int ru)
+ private void executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru)
{
SparseRow empty = new SparseRowVector(1);
for( int i=rl; i<ru; i++ ) {
@@ -243,7 +258,7 @@ public abstract class SpoofRowwise extends SpoofOperator
}
}
- private void executeCompressed(CompressedMatrixBlock a, double[][] b, double[] scalars, double[] c, int n, int rl, int ru)
+ private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru)
{
if( a.isEmptyBlock(false) )
return;
@@ -272,10 +287,10 @@ public abstract class SpoofRowwise extends SpoofOperator
//methods to be implemented by generated operators of type SpoofRowAggrgate
protected abstract void genexec(double[] a, int ai,
- double[][] b, double[] scalars, double[] c, int len, int rowIndex);
+ SideInput[] b, double[] scalars, double[] c, int len, int rowIndex);
protected abstract void genexec(double[] avals, int[] aix, int ai,
- double[][] b, double[] scalars, double[] c, int alen, int n, int rowIndex);
+ SideInput[] b, double[] scalars, double[] c, int alen, int n, int rowIndex);
/**
@@ -284,17 +299,19 @@ public abstract class SpoofRowwise extends SpoofOperator
private class ParColAggTask implements Callable<double[]>
{
private final MatrixBlock _a;
- private final double[][] _b;
+ private final SideInput[] _b;
private final double[] _scalars;
private final int _clen;
+ private final int _clen2;
private final int _rl;
private final int _ru;
- protected ParColAggTask( MatrixBlock a, double[][] b, double[] scalars, int clen, int rl, int ru ) {
+ protected ParColAggTask( MatrixBlock a, SideInput[] b, double[] scalars, int clen, int clen2, int rl, int ru ) {
_a = a;
_b = b;
_scalars = scalars;
_clen = clen;
+ _clen2 = clen2;
_rl = rl;
_ru = ru;
}
@@ -303,8 +320,8 @@ public abstract class SpoofRowwise extends SpoofOperator
public double[] call() throws DMLRuntimeException {
//allocate vector intermediates and partial output
- LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen);
- double[] c = new double[_clen];
+ LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+ double[] c = new double[(_clen2>0)?_clen*_clen2 : _clen];
if( _a instanceof CompressedMatrixBlock )
executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru);
@@ -324,19 +341,21 @@ public abstract class SpoofRowwise extends SpoofOperator
private class ParExecTask implements Callable<Long>
{
private final MatrixBlock _a;
- private final double[][] _b;
+ private final SideInput[] _b;
private final MatrixBlock _c;
private final double[] _scalars;
private final int _clen;
+ private final int _clen2;
private final int _rl;
private final int _ru;
- protected ParExecTask( MatrixBlock a, double[][] b, MatrixBlock c, double[] scalars, int clen, int rl, int ru ) {
+ protected ParExecTask( MatrixBlock a, SideInput[] b, MatrixBlock c, double[] scalars, int clen, int clen2, int rl, int ru ) {
_a = a;
_b = b;
_c = c;
_scalars = scalars;
_clen = clen;
+ _clen2 = clen2;
_rl = rl;
_ru = ru;
}
@@ -344,7 +363,7 @@ public abstract class SpoofRowwise extends SpoofOperator
@Override
public Long call() throws DMLRuntimeException {
//allocate vector intermediates
- LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen);
+ LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
if( _a instanceof CompressedMatrixBlock )
executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
index 622944d..663e269 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java
@@ -313,7 +313,8 @@ public class SpoofSPInstruction extends SPInstruction
}
//setup local memory for reuse
- LibSpoofPrimitives.setupThreadLocalMemory(_op.getNumIntermediates(), _clen);
+ int clen2 = (int) (_op.getRowType().isRowTypeB1() ? _vectors.get(0).getNumCols() : -1);
+ LibSpoofPrimitives.setupThreadLocalMemory(_op.getNumIntermediates(), _clen, clen2);
ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes,MatrixBlock>>();
boolean aggIncr = (_op.getRowType().isColumnAgg() //aggregate entire partition
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/instructions/spark/data/PartitionedBroadcast.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/PartitionedBroadcast.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/PartitionedBroadcast.java
index 1a7aeb3..c58eb91 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/data/PartitionedBroadcast.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/data/PartitionedBroadcast.java
@@ -54,6 +54,14 @@ public class PartitionedBroadcast<T extends CacheBlock> implements Serializable
public Broadcast<PartitionedBlock<T>>[] getBroadcasts() {
return _pbc;
}
+
+ public long getNumRows() {
+ return _pbc[0].value().getNumRows();
+ }
+
+ public long getNumCols() {
+ return _pbc[0].value().getNumCols();
+ }
public int getNumRowBlocks() {
return _pbc[0].value().getNumRowBlocks();
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 0ed0090..8159dc9 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -42,27 +42,14 @@ import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
import org.apache.sysml.runtime.util.UtilFunctions;
/**
- * MB:
- * Library for matrix multiplications including MM, MV, VV for all
+ * MB: Library for matrix multiplications including MM, MV, VV for all
* combinations of dense, sparse, ultrasparse representations and special
* operations such as transpose-self matrix multiplication.
- *
+ * <p>
* In general all implementations use internally dense outputs
* for direct access, but change the final result to sparse if necessary.
* The only exceptions are ultra-sparse matrix mult, wsloss and wsigmoid.
- *
- * NOTES on BLAS:
- * * Experiments in 04/2013 showed that even on dense-dense this implementation
- * is 3x faster than f2j-BLAS-DGEMM, 2x faster than f2c-BLAS-DGEMM, and
- * level (+10% after JIT) with a native C implementation.
- * * Calling native BLAS would loose platform independence and would require
- * JNI calls incl data transfer. Furthermore, BLAS does not support sparse
- * matrices (except Sparse BLAS, with dedicated function calls and matrix formats)
- * and would be an external dependency.
- * * Experiments in 02/2014 showed that on dense-dense this implementation now achieves
- * almost 30% peak FP performance. Compared to Intel MKL 11.1 (dgemm, N=1000) it is
- * just 3.2x (sparsity=1.0) and 1.9x (sparsity=0.5) slower, respectively.
- *
+ * <p>
*/
public class LibMatrixMult
{
@@ -3065,7 +3052,7 @@ public class LibMatrixMult
c[ ci+7 ] += aval1 * b[ bi1+7 ] + aval2 * b[ bi2+7 ] + aval3 * b[ bi3+7 ] + aval4 * b[ bi4+7 ];
}
}
-
+
@SuppressWarnings("unused")
private static void vectMultiplyAdd( final double aval, double[] b, double[] c, int[] bix, final int ci, final int len )
{
@@ -3492,12 +3479,16 @@ public class LibMatrixMult
return ret;
}
- private static boolean checkPrepMatrixMultRightInput( MatrixBlock m1, MatrixBlock m2 )
- {
+ private static boolean checkPrepMatrixMultRightInput( MatrixBlock m1, MatrixBlock m2 ) {
//transpose if dense-dense, skinny rhs matrix (not vector), and memory guarded by output
return (LOW_LEVEL_OPTIMIZATION && !m1.sparse && !m2.sparse
- && m1.rlen > m2.clen && m2.rlen > 64 && m2.clen > 1 && m2.clen < 64
- && 8*m2.rlen*m2.clen < 256*1024 ); //rhs fits in L2 cache
+ && isSkinnyRightHandSide(m1.rlen, m1.clen, m2.rlen, m2.clen));
+ }
+
+ //note: public for use by codegen for consistency
+ public static boolean isSkinnyRightHandSide(long m1rlen, long m1clen, long m2rlen, long m2clen) {
+ return m1rlen > m2clen && m2rlen > m2clen && m2clen > 1
+ && m2clen < 64 && 8*m2rlen*m2clen < L2_CACHESIZE;
}
private static boolean checkParMatrixMultRightInputRows( MatrixBlock m1, MatrixBlock m2, int k ) {
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index 182adf4..e32056a 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -59,6 +59,10 @@ public class RowAggTmplTest extends AutomatedTestBase
private static final String TEST_NAME21 = TEST_NAME+"21"; //sum(X/rowSums(X))
private static final String TEST_NAME22 = TEST_NAME+"22"; //((7+X)+(X-7)+exp(X))/(rowMins(X)+0.5)
private static final String TEST_NAME23 = TEST_NAME+"23"; //L2SVM outer loop
+ private static final String TEST_NAME24 = TEST_NAME+"24"; //t(X)%*%(w*(X%*%v)), w/ mm
+ private static final String TEST_NAME25 = TEST_NAME+"25"; //-2*(X%*%t(C))+t(rowSums(C^2)), w/ mm
+ private static final String TEST_NAME26 = TEST_NAME+"26"; //t(P)%*%X, w/ mm
+ private static final String TEST_NAME27 = TEST_NAME+"27"; //t(X)%*%(X%*%v), w/ mm
private static final String TEST_DIR = "functions/codegen/";
private static final String TEST_CLASS_DIR = TEST_DIR + RowAggTmplTest.class.getSimpleName() + "/";
@@ -70,7 +74,7 @@ public class RowAggTmplTest extends AutomatedTestBase
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
- for(int i=1; i<=23; i++)
+ for(int i=1; i<=27; i++)
addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) );
}
@@ -419,6 +423,66 @@ public class RowAggTmplTest extends AutomatedTestBase
testCodegenIntegration( TEST_NAME23, false, ExecType.SPARK );
}
+ @Test
+ public void testCodegenRowAggRewrite24CP() {
+ testCodegenIntegration( TEST_NAME24, true, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg24CP() {
+ testCodegenIntegration( TEST_NAME24, false, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg24SP() {
+ testCodegenIntegration( TEST_NAME24, false, ExecType.SPARK );
+ }
+
+ @Test
+ public void testCodegenRowAggRewrite25CP() {
+ testCodegenIntegration( TEST_NAME25, true, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg25CP() {
+ testCodegenIntegration( TEST_NAME25, false, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg25SP() {
+ testCodegenIntegration( TEST_NAME25, false, ExecType.SPARK );
+ }
+
+ @Test
+ public void testCodegenRowAggRewrite26CP() {
+ testCodegenIntegration( TEST_NAME26, true, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg26CP() {
+ testCodegenIntegration( TEST_NAME26, false, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg26SP() {
+ testCodegenIntegration( TEST_NAME26, false, ExecType.SPARK );
+ }
+
+ @Test
+ public void testCodegenRowAggRewrite27CP() {
+ testCodegenIntegration( TEST_NAME27, true, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg27CP() {
+ testCodegenIntegration( TEST_NAME27, false, ExecType.CP );
+ }
+
+ @Test
+ public void testCodegenRowAgg27SP() {
+ testCodegenIntegration( TEST_NAME27, false, ExecType.SPARK );
+ }
+
private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
{
boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern24.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern24.R b/src/test/scripts/functions/codegen/rowAggPattern24.R
new file mode 100644
index 0000000..5510437
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern24.R
@@ -0,0 +1,33 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000)/6000, 600, 10, byrow=TRUE);
+w = matrix(seq(1,2400)/2400, 600, 4, byrow=TRUE);
+v = matrix(seq(1,40)/40, 10, 4, byrow=TRUE);
+
+R = t(X) %*% (w * (X %*% v));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep=""));
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern24.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern24.dml b/src/test/scripts/functions/codegen/rowAggPattern24.dml
new file mode 100644
index 0000000..200d552
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern24.dml
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(seq(1,6000)/6000, 600, 10);
+w = matrix(seq(1,2400)/2400, 600, 4);
+v = matrix(seq(1,40)/40, 10, 4);
+if(1==1){}
+
+R = t(X) %*% (w * (X %*% v));
+
+write(R, $1)
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern25.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern25.R b/src/test/scripts/functions/codegen/rowAggPattern25.R
new file mode 100644
index 0000000..0e881bc
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern25.R
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000), 600, 10, byrow=TRUE);
+C = matrix(seq(1,40), 4, 10, byrow=TRUE);
+
+R = -2 * (X %*% t(C)) + matrix(1,nrow(X),1) %*% t(rowSums(C^2))
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep=""));
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern25.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern25.dml b/src/test/scripts/functions/codegen/rowAggPattern25.dml
new file mode 100644
index 0000000..fa8775e
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern25.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(seq(1,6000), 600, 10);
+C = matrix(seq(1,40), 4, 10);
+if(1==1){}
+
+R = -2 * (X %*% t(C)) + t(rowSums(C^2))
+
+write(R, $1)
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern26.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern26.R b/src/test/scripts/functions/codegen/rowAggPattern26.R
new file mode 100644
index 0000000..736c376
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern26.R
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000), 600, 10, byrow=TRUE);
+P = matrix(seq(1,3000), 600, 5, byrow=TRUE);
+
+R = t(P) %*% X;
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep=""));
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern26.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern26.dml b/src/test/scripts/functions/codegen/rowAggPattern26.dml
new file mode 100644
index 0000000..f84b556
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern26.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = matrix(seq(1,6000), 600, 10);
+P = matrix(seq(1,3000), 600, 5)
+if(1==1){}
+
+R = t(P) %*% X;
+
+write(R, $1)
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern27.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern27.R b/src/test/scripts/functions/codegen/rowAggPattern27.R
new file mode 100644
index 0000000..4909732
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern27.R
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000)/6000, 600, 10, byrow=TRUE);
+v = matrix(seq(1,40)/40, 10, 4, byrow=TRUE);
+
+R = t(X) %*% (X %*% v);
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep=""));
http://git-wip-us.apache.org/repos/asf/systemml/blob/6b25b3bf/src/test/scripts/functions/codegen/rowAggPattern27.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern27.dml b/src/test/scripts/functions/codegen/rowAggPattern27.dml
new file mode 100644
index 0000000..c5254c2
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern27.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(seq(1,6000)/6000, 600, 10);
+v = matrix(seq(1,40)/40, 10, 4);
+if(1==1){}
+
+R = t(X) %*% (X %*% v);
+
+write(R, $1)