You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2022/02/26 00:29:23 UTC
[systemds] branch main updated: [SYSTEMDS-3298] Performance sparse reshape via more general CSR output
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 95ac7fe [SYSTEMDS-3298] Performance sparse reshape via more general CSR output
95ac7fe is described below
commit 95ac7fe8f5b72b6433bb73420acd289665af9987
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Feb 26 01:21:16 2022 +0100
[SYSTEMDS-3298] Performance sparse reshape via more general CSR output
For an NLP embedding use case of encoded abstracts with concatenated
word embeddings, padded to the maximum abstract length, we have to deal
with sparse embedding pipelines (transformapply, ctable, ba*+, reshape).
This patch makes some performance improvements to sparse reshape by
allocating and computing the output directly in a more compact CSR
representation (a similar specialization existed but only for CSR
inputs). For 10 iterations of 20K abstracts, max 1000 words/abstract,
2.5M distinct words, and 300dim word embeddings, this patch improved
performance as follows: 10x reshape: 45s->32s (99s->82s overall).
---
.../sysds/runtime/matrix/data/LibMatrixReorg.java | 47 ++++++++++++++++------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
index a288e9e..6e98a2e 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
@@ -2061,25 +2061,46 @@ public class LibMatrixReorg {
}
else if( cols%clen==0 //SPECIAL CSR N:1 MATRIX->MATRIX
&& SHALLOW_COPY_REORG && SPARSE_OUTPUTS_IN_CSR
- && a instanceof SparseBlockCSR ) { //int nnz
- int[] aix = ((SparseBlockCSR)a).indexes();
+ && in.nonZeros < Integer.MAX_VALUE ) { //int nnz
int n = cols/clen, pos = 0;
int[] rptr = new int[rows+1];
int[] indexes = new int[(int)a.size()];
+ double[] values = null;
rptr[0] = 0;
- for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) {
- for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) {
- if(a.isEmpty(i)) continue;
- int apos = a.pos(i);
- int alen = a.size(i);
- for( int j=apos; j<apos+alen; j++ )
- indexes[pos++] = cix+aix[j];
+
+ if(a instanceof SparseBlockCSR) {
+ int[] aix = ((SparseBlockCSR)a).indexes();
+ for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) {
+ for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) {
+ if(a.isEmpty(i)) continue;
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ for( int j=apos; j<apos+alen; j++ )
+ indexes[pos++] = cix+aix[j];
+ }
+ rptr[ci+1] = pos;
+ }
+ //shallow copy of CSR values
+ values = ((SparseBlockCSR)a).values();
+ }
+ else {
+ values = new double[indexes.length];
+ for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) { //output rows
+ for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) { // N input rows
+ if(a.isEmpty(i)) continue;
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ System.arraycopy(a.values(i), apos, values, pos, alen);
+ for( int j=apos; j<apos+alen; j++ )
+ indexes[pos++] = cix+aix[j];
+ }
+ rptr[ci+1] = pos;
}
- rptr[ci+1] = pos;
}
- //create CSR block with shallow copy of values
- out.sparseBlock = new SparseBlockCSR(rptr, indexes,
- ((SparseBlockCSR)a).values(), pos);
+
+ //create CSR block from constructed or shallow-copy arrays
+ out.sparseBlock = new SparseBlockCSR(rptr, indexes, values, pos);
}
else if( cols%clen==0 ) { //SPECIAL N:1 MATRIX->MATRIX
int n = cols/clen;