You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/07 03:45:58 UTC

incubator-systemml git commit: [SYSTEMML-1548] Minor performance improvements ultra-sparse matrix read

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 44d7a8857 -> 0f8b19703


[SYSTEMML-1548] Minor performance improvements ultra-sparse matrix read

This patch makes the following minor performance improvements for
reading ultra-sparse matrices:

(1) Avoid unnecessary csr matrix block conversion before persisting
ultra-sparse matrices into a serialized storage level.

(2) Exploit sparse row scalars in the context of row allocation with
estimated number of non-zeros.

(3) Fix synchronization point allocation for sparse/ultra-sparse matrix
reads (corrupted number of non-zeros). 

(4) Avoid row copies on sparse block append if the right-hand-side block
is not in MCSR format (unnecessary temporary allocation).


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0f8b1970
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0f8b1970
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0f8b1970

Branch: refs/heads/master
Commit: 0f8b19703d446ed6f987167975a2deb506e0fd92
Parents: 44d7a88
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat May 6 20:32:59 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat May 6 20:48:09 2017 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/parser/DMLTranslator.java       | 1 -
 .../runtime/instructions/spark/CheckpointSPInstruction.java    | 6 ++++--
 src/main/java/org/apache/sysml/runtime/io/MatrixReader.java    | 5 +++--
 .../java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java | 3 ++-
 .../org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java  | 6 ++++--
 5 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 9f63038..daf00b7 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 
-import org.antlr.v4.parse.ANTLRParser.option_return;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.conf.ConfigurationManager;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
index 1fa30b6..cddfd12 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
@@ -22,6 +22,7 @@ package org.apache.sysml.runtime.instructions.spark;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.Checkpoint;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
@@ -121,10 +122,11 @@ public class CheckpointSPInstruction extends UnarySPInstruction
 					out = ((JavaPairRDD<Long,FrameBlock>)in)
 						.mapValues(new CopyFrameBlockFunction(false));	
 			}
-		
+			
 			//convert mcsr into memory-efficient csr if potentially sparse
 			if( input1.getDataType()==DataType.MATRIX 
-				&& OptimizerUtils.checkSparseBlockCSRConversion(mcIn) ) 
+				&& OptimizerUtils.checkSparseBlockCSRConversion(mcIn)
+				&& !_level.equals(Checkpoint.SER_STORAGE_LEVEL) ) 
 			{				
 				out = ((JavaPairRDD<MatrixIndexes,MatrixBlock>)out)
 					.mapValues(new CreateSparseBlockFunction(SparseBlock.Type.CSR));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
index ffe290e..11e306e 100644
--- a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
+++ b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
@@ -95,7 +95,8 @@ public abstract class MatrixReader
 	 * @throws IOException if IOException occurs
 	 * @throws DMLRuntimeException if DMLRuntimeException occurs
 	 */
-	protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse ) 
+	protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, 
+			int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse ) 
 		throws IOException, DMLRuntimeException
 	{
 		//check input dimension
@@ -116,7 +117,7 @@ public abstract class MatrixReader
 			if( sblock instanceof SparseBlockMCSR && clen > bclen      //multiple col blocks 
 				&& clen > 0 && bclen > 0 && rlen > 0 && brlen > 0 ) {  //all dims known
 				for( int i=0; i<rlen; i+=brlen )
-					ret.getSparseBlock().allocate(i, Math.min((int)(estnnz/rlen),1), (int)clen);
+					sblock.allocate(i, Math.max((int)(estnnz/rlen),1), (int)clen);
 			}
 		}
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 4172e44..ff6a007 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -723,7 +723,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 				int aix = rowoffset+i;
 					
 				//single block append (avoid re-allocations)
-				if( sparseBlock.isEmpty(aix) && coloffset==0 ) { 
+				if( sparseBlock.isEmpty(aix) && coloffset==0
+					&& b instanceof SparseBlockMCSR ) { 
 					sparseBlock.set(aix, b.get(i), deep);
 				}
 				else { //general case

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 9952fab..2c04865 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -134,8 +134,10 @@ public class SparseBlockMCSR extends SparseBlock
 	
 	@Override
 	public void allocate(int r, int ennz, int maxnnz) {
-		if( _rows[r] == null )
-			_rows[r] = new SparseRowVector(ennz, maxnnz);
+		if( _rows[r] == null ) {
+			_rows[r] = (ennz == 1) ? new SparseRowScalar() :
+				new SparseRowVector(ennz, maxnnz);
+		}
 	}
 	
 	@Override