You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2022/02/26 00:29:23 UTC

[systemds] branch main updated: [SYSTEMDS-3298] Performance sparse reshape via more general CSR output

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 95ac7fe  [SYSTEMDS-3298] Performance sparse reshape via more general CSR output
95ac7fe is described below

commit 95ac7fe8f5b72b6433bb73420acd289665af9987
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Feb 26 01:21:16 2022 +0100

    [SYSTEMDS-3298] Performance sparse reshape via more general CSR output
    
    For an NLP embedding use case of encoded abstracts with concatenated
    word embeddings, padded to the maximum abstract length, we have to deal
    with sparse embedding pipelines (transformapply, ctable, ba*+, reshape).
    
    This patch makes some performance improvements to sparse reshape by
    allocating and computing the output directly in a more compact CSR
    representation (a similar specialization existed but only for CSR
    inputs). For 10 iterations of 20K abstracts, max 1000 words/abstract,
    2.5M distinct words, and 300dim word embeddings, this patch improved
    performance as follows: 10x reshape: 45s->32s (99s->82s overall).
---
 .../sysds/runtime/matrix/data/LibMatrixReorg.java  | 47 ++++++++++++++++------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
index a288e9e..6e98a2e 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
@@ -2061,25 +2061,46 @@ public class LibMatrixReorg {
 			}
 			else if( cols%clen==0 //SPECIAL CSR N:1 MATRIX->MATRIX
 				&& SHALLOW_COPY_REORG && SPARSE_OUTPUTS_IN_CSR
-				&& a instanceof SparseBlockCSR ) { //int nnz
-				int[] aix = ((SparseBlockCSR)a).indexes();
+				&& in.nonZeros < Integer.MAX_VALUE ) { //int nnz
 				int n = cols/clen, pos = 0;
 				int[] rptr = new int[rows+1];
 				int[] indexes = new int[(int)a.size()];
+				double[] values = null;
 				rptr[0] = 0;
-				for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) {
-					for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) {
-						if(a.isEmpty(i)) continue;
-						int apos = a.pos(i);
-						int alen = a.size(i);
-						for( int j=apos; j<apos+alen; j++ )
-							indexes[pos++] = cix+aix[j];
+				
+				if(a instanceof SparseBlockCSR) {
+					int[] aix = ((SparseBlockCSR)a).indexes();
+					for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) {
+						for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) {
+							if(a.isEmpty(i)) continue;
+							int apos = a.pos(i);
+							int alen = a.size(i);
+							for( int j=apos; j<apos+alen; j++ )
+								indexes[pos++] = cix+aix[j];
+						}
+						rptr[ci+1] = pos;
+					}
+					//shallow copy of CSR values
+					values = ((SparseBlockCSR)a).values();
+				}
+				else {
+					values = new double[indexes.length];
+					for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) { //output rows
+						for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) { // N input rows
+							if(a.isEmpty(i)) continue;
+							int apos = a.pos(i);
+							int alen = a.size(i);
+							int[] aix = a.indexes(i);
+							System.arraycopy(a.values(i), apos, values, pos, alen);
+							for( int j=apos; j<apos+alen; j++ )
+								indexes[pos++] = cix+aix[j];
+						}
+						rptr[ci+1] = pos;
 					}
-					rptr[ci+1] = pos;
 				}
-				//create CSR block with shallow copy of values
-				out.sparseBlock = new SparseBlockCSR(rptr, indexes,
-					((SparseBlockCSR)a).values(), pos);
+				
+				//create CSR block from constructed or shallow-copy arrays
+				out.sparseBlock = new SparseBlockCSR(rptr, indexes, values, pos);
 			}
 			else if( cols%clen==0 ) { //SPECIAL N:1 MATRIX->MATRIX
 				int n = cols/clen;