You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/09 05:33:20 UTC
[4/4] systemml git commit: [SYSTEMML-1754] Performance removeEmpty
(shallow copy if unmodified)
[SYSTEMML-1754] Performance removeEmpty (shallow copy if unmodified)
This patch improves the performance and memory efficiency of removeEmpty
rows/columns for both dense and sparse inputs by using a shallow output
copy if the determined output dimensions match the input dimensions.
Basic experiments (cumulative runtime for 100 iterations):
1) removeEmpty rows 10Kx10K, sp=1.0 (dense): 33.9 -> 0.027s
2) removeEmpty rows 100Kx1K, sp=1.0 (dense): 33.2 -> 0.137s
3) removeEmpty cols 10Kx10K, sp=1.0 (dense): 57.8 -> 14.8s
4) removeEmpty cols 1Kx100K, sp=1.0 (dense): 61.5 -> 14.3s
5) removeEmpty rows 10Kx10K, sp=0.1 (sparse): 0.055 -> 0.036s
6) removeEmpty rows 100Kx1K, sp=0.1 (sparse): 0.470 -> 0.127s
7) removeEmpty cols 10Kx10K, sp=0.1 (sparse): 55.3 -> 1.3s
8) removeEmpty cols 1Kx100K, sp=0.1 (sparse): 39.8 -> 1.1s
Note that this patch improves performance by orders of magnitude, except
for sparse removeEmpty rows (which already used shallow row copy).
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b84a4933
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b84a4933
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b84a4933
Branch: refs/heads/master
Commit: b84a4933c11a25d064216bc0606938bcf0d792e6
Parents: 73f7c6a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Jul 8 21:44:51 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 8 22:32:05 2017 -0700
----------------------------------------------------------------------
.../runtime/matrix/data/LibMatrixReorg.java | 103 +++++++++++--------
1 file changed, 61 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/b84a4933/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 73d50eb..205f787 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -1710,7 +1710,14 @@ public class LibMatrixReorg
if( in.isEmptyBlock(false) )
return ret;
- if( in.sparse ) //* <- SPARSE
+ if( SHALLOW_COPY_REORG && m == rlen2 ) {
+ ret.sparse = in.sparse;
+ if( ret.sparse )
+ ret.sparseBlock = in.sparseBlock;
+ else
+ ret.denseBlock = in.denseBlock;
+ }
+ else if( in.sparse ) //* <- SPARSE
{
//note: output dense or sparse
for( int i=0, cix=0; i<m; i++ )
@@ -1798,14 +1805,7 @@ public class LibMatrixReorg
clen2 += flags[j] ? 1 : 0;
}
- //Step 3: create mapping of flags to target indexes
- int[] cix = new int[n];
- for( int j=0, pos=0; j<n; j++ ) {
- if( flags[j] )
- cix[j] = pos++;
- }
-
- //Step 3: reset result and copy cols
+ //Step 3: reset result and copy columns
//dense stays dense if correct input representation (but robust for any input),
// sparse might be dense/sparse
clen2 = Math.max(clen2, 1); //ensure valid output
@@ -1814,42 +1814,61 @@ public class LibMatrixReorg
if( in.isEmptyBlock(false) )
return ret;
- if( in.sparse ) //* <- SPARSE
- {
- //note: output dense or sparse
- SparseBlock a = in.sparseBlock;
-
- for( int i=0; i<m; i++ )
- if ( !a.isEmpty(i) ) {
- int apos = a.pos(i);
- int alen = a.size(i);
- int[] aix = a.indexes(i);
- double[] avals = a.values(i);
- for( int j=apos; j<apos+alen; j++ )
- if( flags[aix[j]] )
- ret.appendValue(i, cix[aix[j]], avals[j]);
- }
- }
- else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE
- {
- ret.allocateDenseBlock();
- double[] a = in.denseBlock;
- double[] c = ret.denseBlock;
-
- for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2)
- for(int j=0; j<n; j++, aix++)
- if( flags[j] )
- c[ lcix+cix[j] ] = a[aix];
+ if( SHALLOW_COPY_REORG && n == clen2 ) {
+ //quick path: shallow copy if unmodified
+ ret.sparse = in.sparse;
+ if( ret.sparse )
+ ret.sparseBlock = in.sparseBlock;
+ else
+ ret.denseBlock = in.denseBlock;
}
- else //SPARSE <- DENSE
+ else
{
- ret.allocateSparseRowsBlock();
- double[] a = in.denseBlock;
+ //create mapping of flags to target indexes
+ int[] cix = new int[n];
+ for( int j=0, pos=0; j<n; j++ ) {
+ if( flags[j] )
+ cix[j] = pos++;
+ }
- for(int i=0, aix=0; i<m; i++)
- for(int j=0; j<n; j++, aix++)
- if( flags[j] && a[aix]!=0 )
- ret.appendValue(i, cix[j], a[aix]);
+ //deep copy of modified outputs
+ if( in.sparse ) //* <- SPARSE
+ {
+ //note: output dense or sparse
+ SparseBlock a = in.sparseBlock;
+
+ for( int i=0; i<m; i++ )
+ if ( !a.isEmpty(i) ) {
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ double[] avals = a.values(i);
+ for( int j=apos; j<apos+alen; j++ )
+ if( flags[aix[j]] )
+ ret.appendValue(i, cix[aix[j]], avals[j]);
+ }
+ }
+ else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE
+ {
+ ret.allocateDenseBlock();
+ double[] a = in.denseBlock;
+ double[] c = ret.denseBlock;
+
+ for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2)
+ for(int j=0; j<n; j++, aix++)
+ if( flags[j] )
+ c[ lcix+cix[j] ] = a[aix];
+ }
+ else //SPARSE <- DENSE
+ {
+ ret.allocateSparseRowsBlock();
+ double[] a = in.denseBlock;
+
+ for(int i=0, aix=0; i<m; i++)
+ for(int j=0; j<n; j++, aix++)
+ if( flags[j] && a[aix]!=0 )
+ ret.appendValue(i, cix[j], a[aix]);
+ }
}
//check sparsity