You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/24 20:31:44 UTC

[1/2] incubator-systemml git commit: [SYSTEMML-694] Improved transpose-matmult lop compilation, for lstm

Repository: incubator-systemml
Updated Branches:
  refs/heads/master a5584c0fd -> 3841ca88e


[SYSTEMML-694] Improved transpose-matmult lop compilation, for lstm

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7c5b83c1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7c5b83c1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7c5b83c1

Branch: refs/heads/master
Commit: 7c5b83c1455baaf5e18d37587b1f709af8c1a8c7
Parents: a5584c0
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Jul 23 18:06:51 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jul 23 18:06:51 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/AggBinaryOp.java |  9 ++++++---
 .../java/org/apache/sysml/hops/ReorgOp.java     | 20 ++++++++++++++------
 2 files changed, 20 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c5b83c1/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 0532d01..ea58ebd 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -695,20 +695,23 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
 	{
 		Hop X = getInput().get(0).getInput().get(0); //guaranteed to exists
 		Hop Y = getInput().get(1);
+		int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
 		
 		//right vector transpose
-		Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
+		Lop lY = Y.constructLops();
+		Lop tY = (lY instanceof Transform && ((Transform)lY).getOperationType()==OperationTypes.Transpose ) ?
+				lY.getInputs().get(0) : //if input is already a transpose, avoid redundant transpose ops
+				new Transform(lY, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP, k);
 		tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
 		setLineNumbers(tY);
 		
 		//matrix mult
-		int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
 		Lop mult = new Binary(tY, X.constructLops(), Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.CP, k);	
 		mult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
 		setLineNumbers(mult);
 		
 		//result transpose (dimensions set outside)
-		Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
+		Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP, k);
 		
 		return out;
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c5b83c1/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index d283d16..5f5138b 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -31,6 +31,7 @@ import org.apache.sysml.lops.LopsException;
 import org.apache.sysml.lops.SortKeys;
 import org.apache.sysml.lops.Transform;
 import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.Transform.OperationTypes;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -131,12 +132,19 @@ public class ReorgOp extends Hop implements MultiThreadedHop
 		{
 			case TRANSPOSE:
 			{
-				int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
-				Transform transform1 = new Transform( getInput().get(0).constructLops(), 
-						HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
-				setOutputDimensions(transform1);
-				setLineNumbers(transform1);
-				setLops(transform1);			
+				Lop lin = getInput().get(0).constructLops();
+				if( lin instanceof Transform && ((Transform)lin).getOperationType()==OperationTypes.Transpose )
+					setLops(lin.getInputs().get(0)); //if input is already a transpose, avoid redundant transpose ops
+				else if( getDim1()==1 && getDim2()==1 )
+					setLops(lin); //if input of size 1x1, avoid unnecessary transpose
+				else { //general case
+					int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
+					Transform transform1 = new Transform( lin, 
+							HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
+					setOutputDimensions(transform1);
+					setLineNumbers(transform1);
+					setLops(transform1);
+				}
 				break;
 			}
 			case DIAG:


[2/2] incubator-systemml git commit: [SYSTEMML-694] Performance dense-sparse vector transpose, for lstm

Posted by mb...@apache.org.
[SYSTEMML-694] Performance dense-sparse vector transpose, for lstm

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3841ca88
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3841ca88
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3841ca88

Branch: refs/heads/master
Commit: 3841ca88e69842ae528b5e329c38b123ddec7ef1
Parents: 7c5b83c
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Jul 23 18:07:56 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jul 23 18:07:56 2016 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 44 ++++++++++++--------
 1 file changed, 26 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3841ca88/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index c5674bb..e472413 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -859,24 +859,32 @@ public class LibMatrixReorg
 		double[] a = in.getDenseBlock();
 		SparseBlock c = out.getSparseBlock();
 		
-		//blocking according to typical L2 cache sizes 
-		final int blocksizeI = 128;
-		final int blocksizeJ = 128; 
-		
-		//blocked execution
-		for( int bi = 0; bi<m; bi+=blocksizeI )
-			for( int bj = 0; bj<n; bj+=blocksizeJ )
-			{
-				int bimin = Math.min(bi+blocksizeI, m);
-				int bjmin = Math.min(bj+blocksizeJ, n);
-				//core transpose operation
-				for( int i=bi; i<bimin; i++ )				
-					for( int j=bj, aix=i*n+bj; j<bjmin; j++, aix++ )
-					{
-						c.allocate(j, ennz2, n2); 
-						c.append(j, i, a[aix]);
-					}
-			}
+		if( out.rlen == 1 ) //VECTOR-VECTOR
+		{	
+			c.allocate(0, (int)in.nonZeros); 
+			c.setIndexRange(0, 0, m, a, 0, m);
+		}
+		else //general case: MATRIX-MATRIX
+		{
+			//blocking according to typical L2 cache sizes 
+			final int blocksizeI = 128;
+			final int blocksizeJ = 128; 
+			
+			//blocked execution
+			for( int bi = 0; bi<m; bi+=blocksizeI )
+				for( int bj = 0; bj<n; bj+=blocksizeJ )
+				{
+					int bimin = Math.min(bi+blocksizeI, m);
+					int bjmin = Math.min(bj+blocksizeJ, n);
+					//core transpose operation
+					for( int i=bi; i<bimin; i++ )				
+						for( int j=bj, aix=i*n+bj; j<bjmin; j++, aix++ )
+						{
+							c.allocate(j, ennz2, n2); 
+							c.append(j, i, a[aix]);
+						}
+				}
+		}
 	}
 	
 	/**