You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/10 07:02:36 UTC

[3/3] incubator-systemml git commit: [SYSTEMML-1140] Performance conv2d_bias_add (cache-conscious transpose)

[SYSTEMML-1140] Performance conv2d_bias_add (cache-conscious transpose)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/de1e119d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/de1e119d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/de1e119d

Branch: refs/heads/master
Commit: de1e119de0b2fc2a6c6a2c57bf64c4172a26890d
Parents: d0b23d6
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Feb 10 06:58:34 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 10 07:55:53 2017 +0100

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/LibMatrixDNN.java | 66 +++++++++-----------
 1 file changed, 31 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/de1e119d/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 29e59bd..82b0a61 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -233,39 +233,39 @@ public class LibMatrixDNN {
 	}
 	
 	/**
-	 * Performs the operation: ret += t(elem)
+	 * Performs the operation for(e : elem) ret += t(e) in a cache-conscious manner
+	 * by sequentially aggregating for(e : elem) tmp += e and finally transposing
+	 * ret = t(tmp).
+	 * 
 	 * @param ret left and output matrix
-	 * @param elem right untransposed matrix
+	 * @param elem array of right untransposed matrices (expected in dense format)
 	 * @param params convolution parameters
-	 * @throws DMLRuntimeException if DMLRuntimeException occurs
+	 * @throws DMLRuntimeException in case of unsupported inputs or output
 	 */
-	private static void elementWiseInPlaceTransposedAddition(MatrixBlock ret, MatrixBlock elem) throws DMLRuntimeException {
-		if(ret.getNumRows() != elem.getNumColumns() || ret.getNumColumns() != elem.getNumRows()) {
-			throw new DMLRuntimeException("Incorrect dimensions");
-		}
-		int numRow = ret.getNumColumns();
-		if(!ret.isInSparseFormat() && !elem.isInSparseFormat()) {
-			int iter = 0;
-			for(int i = 0; i < elem.getNumRows(); i++) {
-				for(int j = 0; j < elem.getNumColumns(); j++, iter++) {
-					int index = j*numRow+i;
-					ret.denseBlock[index] += elem.denseBlock[iter];
-				}
-			}
-		}
-		else if(!ret.isInSparseFormat() && elem.isInSparseFormat()) {
-			if(!elem.isEmptyBlock()) {
-				Iterator<IJV> iter = elem.sparseBlock.getIterator();
-				while(iter.hasNext()) {
-					IJV ijv = iter.next();
-					int index = ijv.getJ()*numRow + ijv.getI();
-					ret.denseBlock[index] += ijv.getV(); 
-				}
-			}
-		}
-		else {
-			throw new DMLRuntimeException("Sparse return format not supported");
+	private static void elementWiseInPlaceTransposedAddition(MatrixBlock ret, MatrixBlock[] elem) 
+		throws DMLRuntimeException 
+	{
+		//sanity checks non-empty and dense inputs / dense output
+		if( elem == null || elem.length==0 )
+			throw new DMLRuntimeException("Empty input not supported.");
+		for( MatrixBlock e : elem )
+			if( e.isInSparseFormat() )
+				throw new DMLRuntimeException("Sparse input format not supported.");
+		if( ret.isInSparseFormat() )
+			throw new DMLRuntimeException("Sparse output format not supported.");
+				
+		//Step 1: aggregate partial blocks without transpose
+		MatrixBlock tmpAgg = elem[0]; 
+		double[] tmp = tmpAgg.denseBlock;
+		for( int k=1; k<elem.length; k++ ) {
+			double[] tmp2 = elem[k].denseBlock;
+			for( int i=0; i<tmp.length; i++ )
+				tmp[i] += tmp2[i];
 		}
+		
+		//Step 2: cache-conscious transpose to output
+		tmpAgg.setNonZeros(-1); //avoid early abort
+		LibMatrixReorg.transpose(tmpAgg, ret);
 	}
 	
 	@SuppressWarnings("unused")
@@ -948,9 +948,7 @@ public class LibMatrixDNN {
 				for( Future<Long> task : taskret )
 					params.output.nonZeros += task.get();
 				if(type == TaskType.LoopedIm2ColConv2dBwdFilter) {
-					for(MatrixBlock partialRetBlock : partialRetBlocks) {
-						elementWiseInPlaceTransposedAddition(params.output, partialRetBlock);
-					}
+					elementWiseInPlaceTransposedAddition(params.output, partialRetBlocks.toArray(new MatrixBlock[0]));
 				}
 			} 
 			catch (Exception e) {
@@ -965,9 +963,7 @@ public class LibMatrixDNN {
 						doutReshapedBlocks, partialRetBlocks).call());
 				
 				if(type == TaskType.LoopedIm2ColConv2dBwdFilter) {
-					for(MatrixBlock partialRetBlock : partialRetBlocks) {
-						elementWiseInPlaceTransposedAddition(params.output, partialRetBlock);
-					}
+					elementWiseInPlaceTransposedAddition(params.output, partialRetBlocks.toArray(new MatrixBlock[0]));
 				}
 			} catch (Exception e) {
 				throw new DMLRuntimeException("Error while executing single-threaded " + type.name(), e);