You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/02/29 21:34:57 UTC

incubator-systemml git commit: [SYSTEMML-383] Performance Spark CSR conversion (avoid redundant copies)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master a21d8c6ed -> 8eee978ed


[SYSTEMML-383] Performance Spark CSR conversion (avoid redundant copies)

There are two sources of redundant deep copies of sparse matrix blocks:
(1) before CSR conversion we unnecessarily created an MCSR deep copy on
read from binary inputs, and (2) for checkpoints following a read, we
created an unnecessary CSR copy if the block is already in CSR. This
patch avoids these unnecessary deep copies by applying the CSR
conversion instead of the required deep copy and shallow copies if
blocks are already in CSR format. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8eee978e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8eee978e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8eee978e

Branch: refs/heads/master
Commit: 8eee978ed4fd01ec085d6526ebdcf102271a356b
Parents: a21d8c6
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Feb 28 19:17:56 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Mon Feb 29 12:33:14 2016 -0800

----------------------------------------------------------------------
 .../spark/functions/CopyBlockPairFunction.java           | 11 +++++++++--
 .../spark/functions/CreateSparseBlockFunction.java       |  4 +++-
 .../apache/sysml/runtime/matrix/data/MatrixBlock.java    |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
index d23dcfc..301ca4d 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
@@ -22,8 +22,10 @@ import org.apache.spark.api.java.function.PairFunction;
 
 import scala.Tuple2;
 
+import org.apache.sysml.lops.Checkpoint;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
 
 /**
  * General purpose copy function for binary block rdds. This function can be used in
@@ -51,8 +53,13 @@ public class CopyBlockPairFunction implements PairFunction<Tuple2<MatrixIndexes,
 	{	
 		if( _deepCopy ) {
 			MatrixIndexes ix = new MatrixIndexes(arg0._1());
-			MatrixBlock block = new MatrixBlock();
-			block.copy(arg0._2());
+			MatrixBlock block = null;
+			//always create deep copies in more memory-efficient CSR representation 
+			//if block is already in sparse format			
+			if( Checkpoint.CHECKPOINT_SPARSE_CSR && arg0._2.isInSparseFormat() )
+				block = new MatrixBlock(arg0._2, SparseBlock.Type.CSR, true);
+			else
+				block = new MatrixBlock(arg0._2());
 			return new Tuple2<MatrixIndexes,MatrixBlock>(ix,block);
 		}
 		else {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
index 51f3217..7cf6e8c 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
@@ -42,8 +42,10 @@ public class CreateSparseBlockFunction implements Function<MatrixBlock,MatrixBlo
 	public MatrixBlock call(MatrixBlock arg0)
 		throws Exception 
 	{
+		//convert given block to CSR representation if in sparse format
+		//but allow shallow pass-through if already in CSR representation. 
 		if( arg0.isInSparseFormat() )
-			return new MatrixBlock(arg0, _stype);
+			return new MatrixBlock(arg0, _stype, false);
 		else //pass through dense
 			return arg0;	
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8f47bc3..ddafe99 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -175,7 +175,7 @@ public class MatrixBlock extends MatrixValue implements Externalizable
 		this.copy(that);
 	}
 	
-	public MatrixBlock(MatrixBlock that, SparseBlock.Type stype)
+	public MatrixBlock(MatrixBlock that, SparseBlock.Type stype, boolean deep)
 	{
 		//sanity check sparse matrix block
 		if( !that.isInSparseFormat() )
@@ -188,7 +188,7 @@ public class MatrixBlock extends MatrixValue implements Externalizable
 		nonZeros = that.nonZeros;
 		estimatedNNzsPerRow = that.estimatedNNzsPerRow;
 		sparseBlock = SparseBlockFactory
-				.copySparseBlock(stype, that.sparseBlock, true);
+				.copySparseBlock(stype, that.sparseBlock, deep);
 	}
 	
 	////////