You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/07 03:45:58 UTC
incubator-systemml git commit: [SYSTEMML-1548] Minor performance
improvements ultra-sparse matrix read
Repository: incubator-systemml
Updated Branches:
refs/heads/master 44d7a8857 -> 0f8b19703
[SYSTEMML-1548] Minor performance improvements ultra-sparse matrix read
This patch makes the following minor performance improvements for
reading ultra-sparse matrices:
(1) Avoid unnecessary csr matrix block conversion before persisting
ultra-sparse matrices into a serialized storage level.
(2) Exploit sparse row scalars in the context of row allocation with
estimated number of non-zeros.
(3) Fix synchronization point allocation for sparse/ultra-sparse matrix
reads (corrupted number of non-zeros).
(4) Avoid row copies on sparse block append if the right-hand-side block
is not in MCSR format (unnecessary temporary allocation).
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0f8b1970
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0f8b1970
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0f8b1970
Branch: refs/heads/master
Commit: 0f8b19703d446ed6f987167975a2deb506e0fd92
Parents: 44d7a88
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat May 6 20:32:59 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat May 6 20:48:09 2017 -0700
----------------------------------------------------------------------
src/main/java/org/apache/sysml/parser/DMLTranslator.java | 1 -
.../runtime/instructions/spark/CheckpointSPInstruction.java | 6 ++++--
src/main/java/org/apache/sysml/runtime/io/MatrixReader.java | 5 +++--
.../java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java | 3 ++-
.../org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java | 6 ++++--
5 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 9f63038..daf00b7 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
-import org.antlr.v4.parse.ANTLRParser.option_return;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.conf.ConfigurationManager;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
index 1fa30b6..cddfd12 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
@@ -22,6 +22,7 @@ package org.apache.sysml.runtime.instructions.spark;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.storage.StorageLevel;
import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.Checkpoint;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
@@ -121,10 +122,11 @@ public class CheckpointSPInstruction extends UnarySPInstruction
out = ((JavaPairRDD<Long,FrameBlock>)in)
.mapValues(new CopyFrameBlockFunction(false));
}
-
+
//convert mcsr into memory-efficient csr if potentially sparse
if( input1.getDataType()==DataType.MATRIX
- && OptimizerUtils.checkSparseBlockCSRConversion(mcIn) )
+ && OptimizerUtils.checkSparseBlockCSRConversion(mcIn)
+ && !_level.equals(Checkpoint.SER_STORAGE_LEVEL) )
{
out = ((JavaPairRDD<MatrixIndexes,MatrixBlock>)out)
.mapValues(new CreateSparseBlockFunction(SparseBlock.Type.CSR));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
index ffe290e..11e306e 100644
--- a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
+++ b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
@@ -95,7 +95,8 @@ public abstract class MatrixReader
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
- protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse )
+ protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen,
+ int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse )
throws IOException, DMLRuntimeException
{
//check input dimension
@@ -116,7 +117,7 @@ public abstract class MatrixReader
if( sblock instanceof SparseBlockMCSR && clen > bclen //multiple col blocks
&& clen > 0 && bclen > 0 && rlen > 0 && brlen > 0 ) { //all dims known
for( int i=0; i<rlen; i+=brlen )
- ret.getSparseBlock().allocate(i, Math.min((int)(estnnz/rlen),1), (int)clen);
+ sblock.allocate(i, Math.max((int)(estnnz/rlen),1), (int)clen);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 4172e44..ff6a007 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -723,7 +723,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
int aix = rowoffset+i;
//single block append (avoid re-allocations)
- if( sparseBlock.isEmpty(aix) && coloffset==0 ) {
+ if( sparseBlock.isEmpty(aix) && coloffset==0
+ && b instanceof SparseBlockMCSR ) {
sparseBlock.set(aix, b.get(i), deep);
}
else { //general case
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 9952fab..2c04865 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -134,8 +134,10 @@ public class SparseBlockMCSR extends SparseBlock
@Override
public void allocate(int r, int ennz, int maxnnz) {
- if( _rows[r] == null )
- _rows[r] = new SparseRowVector(ennz, maxnnz);
+ if( _rows[r] == null ) {
+ _rows[r] = (ennz == 1) ? new SparseRowScalar() :
+ new SparseRowVector(ennz, maxnnz);
+ }
}
@Override