You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/08/15 19:04:15 UTC
incubator-systemml git commit: [SYSTEMML-824] Performance
dense-sparse block conversion (row pre-alloc)
Repository: incubator-systemml
Updated Branches:
refs/heads/master e9aa58414 -> 5ac32d6be
[SYSTEMML-824] Performance dense-sparse block conversion (row pre-alloc)
This patch improves the performance of the core matrix block primitive
denseToSparse as triggered in examSparsity(). Similar to sparse
transpose, we now pre-allocate sparse rows with exact nnz information,
in order to avoid repeated reallocations and internal copies. On
scenario with a 10k x 10k matrix and sparsity 0.25, the runtime improved
from 1.7s to 490ms.
Furthermore, this patch also includes some minor cleanups (e.g., missing
imports and unused variables).
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5ac32d6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5ac32d6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5ac32d6b
Branch: refs/heads/master
Commit: 5ac32d6be15ce5ab212cc2d040546825195e41e6
Parents: e9aa584
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Aug 14 19:15:11 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Aug 14 19:15:11 2016 -0700
----------------------------------------------------------------------
.../org/apache/sysml/hops/ConvolutionOp.java | 3 --
.../runtime/matrix/data/LibMatrixCUDA.java | 1 -
.../sysml/runtime/matrix/data/MatrixBlock.java | 33 ++++++++++++++------
3 files changed, 23 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index 8c38a48..c010de6 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -31,7 +31,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
public class ConvolutionOp extends Hop implements MultiThreadedHop
@@ -231,9 +230,7 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
// [numRows, numCols, NNZ]
long[] ret = null;
- Hop input1 = getInput().get(0);
ConvolutionParameters params;
- MatrixCharacteristics mc = memo.getAllInputStats(input1);
try {
params = parseInput();
} catch (DMLRuntimeException e) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 52272a0..6a25b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -49,7 +49,6 @@ import static jcuda.runtime.JCuda.cudaFree;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.jcublas.JCublas;
-import jcuda.jcublas.JCublas2;
import jcuda.jcublas.cublasHandle;
import jcuda.jcudnn.cudnnConvolutionDescriptor;
import jcuda.jcudnn.cudnnFilterDescriptor;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 1316ad8..8bd7b79 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1203,20 +1203,33 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
allocateSparseRowsBlock();
reset();
- //copy dense to sparse
+ //copy dense to sparse with (1) row pre-allocation to avoid repeated
+ //allocation on append, and (2) nnz re-computation
double[] a = denseBlock;
SparseBlock c = sparseBlock;
+ final int m = rlen;
+ final int n = clen;
- for( int i=0, aix=0; i<rlen; i++ )
- for(int j=0; j<clen; j++, aix++)
- if( a[aix] != 0 ) {
- //create sparse row only if required
- c.allocate(i, estimatedNNzsPerRow, clen);
- c.append(i, j, a[aix]);
- nonZeros++;
- }
+ long nnz = 0;
+ for( int i=0, aix=0; i<m; i++, aix+=n ) {
+ //recompute nnz per row (not via recomputeNonZeros as sparse allocated)
+ int lnnz = 0;
+ for(int j=0; j<n; j++)
+ lnnz += (a[aix+j]!=0) ? 1 : 0;
+ if( lnnz <= 0 ) continue;
+
+ //allocate sparse row and append non-zero values
+ c.allocate(i, lnnz);
+ for(int j=0; j<n; j++) {
+ double val = a[aix+j];
+ if( val != 0 )
+ c.append(i, j, val);
+ }
+ nnz += lnnz;
+ }
- //cleanup dense block
+ //update nnz and cleanup dense block
+ nonZeros = nnz;
denseBlock = null;
}