You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/05/15 14:08:22 UTC
[systemds] branch master updated: [SYSTEMDS-2975] Fix frame/matrix
casting of large dense blocks
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 5878dad [SYSTEMDS-2975] Fix frame/matrix casting of large dense blocks
5878dad is described below
commit 5878dad6fecda0585eafe094e4b99bc96fe4683b
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat May 15 15:25:24 2021 +0200
[SYSTEMDS-2975] Fix frame/matrix casting of large dense blocks
This patch fixes the missing support for large dense blocks in
matrix-to-frame and frame-to-matrix conversions, specifically for the
special case of homogeneous FP64 schemas. Furthermore, we introduce
minor performance improvements that avoid unnecessary recomputation of
the number of non-zeros, and better cache-blocking aligned with L1
(for column to row-major and vice versa).
Thanks to Lukas for catching this issue.
Co-authored-by: Lukas Erlbacher <lu...@gmail.com>
---
.../apache/sysds/runtime/util/DataConverter.java | 85 +++++++++++++++-------
1 file changed, 60 insertions(+), 25 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
index c181b5c..841c6d7 100644
--- a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
@@ -606,25 +606,43 @@ public class DataConverter {
// special case double schema (without cell-object creation,
// cache-friendly row-column copy)
double[][] a = new double[n][];
- double[] c = mb.getDenseBlockValues();
for( int j=0; j<n; j++ )
a[j] = (double[])frame.getColumnData(j);
- int blocksizeIJ = 16; //blocks of a+overhead/c in L1 cache
- for( int bi=0; bi<m; bi+=blocksizeIJ )
- for( int bj=0; bj<n; bj+=blocksizeIJ ) {
- int bimin = Math.min(bi+blocksizeIJ, m);
- int bjmin = Math.min(bj+blocksizeIJ, n);
- for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
- for( int j=bj; j<bjmin; j++ )
- c[aix+j] = a[j][i];
- }
+ int blocksizeIJ = 32; //blocks of a+overhead/c in L1 cache
+ long lnnz = 0;
+ if( mb.getDenseBlock().isContiguous() ) {
+ double[] c = mb.getDenseBlockValues();
+ for( int bi=0; bi<m; bi+=blocksizeIJ )
+ for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+ int bimin = Math.min(bi+blocksizeIJ, m);
+ int bjmin = Math.min(bj+blocksizeIJ, n);
+ for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
+ for( int j=bj; j<bjmin; j++ )
+ lnnz += (c[aix+j] = a[j][i]) != 0 ? 1 : 0;
+ }
+ }
+ else {
+ DenseBlock c = mb.getDenseBlock();
+ for( int bi=0; bi<m; bi+=blocksizeIJ )
+ for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+ int bimin = Math.min(bi+blocksizeIJ, m);
+ int bjmin = Math.min(bj+blocksizeIJ, n);
+ for( int i=bi; i<bimin; i++ ) {
+ double[] cvals = c.values(i);
+ int cpos = c.pos(i);
+ for( int j=bj; j<bjmin; j++ )
+ lnnz += (cvals[cpos+j] = a[j][i]) != 0 ? 1 : 0;
+ }
+ }
+ }
+ mb.setNonZeros(lnnz);
}
else {
//general case
for( int i=0; i<frame.getNumRows(); i++ )
for( int j=0; j<frame.getNumColumns(); j++ ) {
mb.appendValue(i, j, UtilFunctions.objectToDouble(
- schema[j], frame.get(i, j)));
+ schema[j], frame.get(i, j)));
}
}
@@ -729,7 +747,7 @@ public class DataConverter {
double[] aval = sblock.values(i);
for( int j=apos; j<apos+alen; j++ ) {
row[aix[j]] = UtilFunctions.doubleToObject(
- schema[aix[j]], aval[j]);
+ schema[aix[j]], aval[j]);
}
}
frame.appendRow(row);
@@ -751,18 +769,35 @@ public class DataConverter {
// col pre-allocation, and cache-friendly row-column copy)
int m = mb.getNumRows();
int n = mb.getNumColumns();
- double[] a = mb.getDenseBlockValues();
double[][] c = new double[n][m];
- int blocksizeIJ = 16; //blocks of a/c+overhead in L1 cache
- if( !mb.isEmptyBlock(false) )
- for( int bi=0; bi<m; bi+=blocksizeIJ )
- for( int bj=0; bj<n; bj+=blocksizeIJ ) {
- int bimin = Math.min(bi+blocksizeIJ, m);
- int bjmin = Math.min(bj+blocksizeIJ, n);
- for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
- for( int j=bj; j<bjmin; j++ )
- c[j][i] = a[aix+j];
- }
+ int blocksizeIJ = 32; //blocks of a/c+overhead in L1 cache
+ if( !mb.isEmptyBlock(false) ) {
+ if( mb.getDenseBlock().isContiguous() ) {
+ double[] a = mb.getDenseBlockValues();
+ for( int bi=0; bi<m; bi+=blocksizeIJ )
+ for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+ int bimin = Math.min(bi+blocksizeIJ, m);
+ int bjmin = Math.min(bj+blocksizeIJ, n);
+ for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
+ for( int j=bj; j<bjmin; j++ )
+ c[j][i] = a[aix+j];
+ }
+ }
+ else { // large dense blocks
+ DenseBlock a = mb.getDenseBlock();
+ for( int bi=0; bi<m; bi+=blocksizeIJ )
+ for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+ int bimin = Math.min(bi+blocksizeIJ, m);
+ int bjmin = Math.min(bj+blocksizeIJ, n);
+ for( int i=bi; i<bimin; i++ ) {
+ double[] avals = a.values(i);
+ int apos = a.pos(i);
+ for( int j=bj; j<bjmin; j++ )
+ c[j][i] = avals[apos+j];
+ }
+ }
+ }
+ }
frame.reset();
frame.appendColumns(c);
}
@@ -770,8 +805,8 @@ public class DataConverter {
// general case
for( int i=0; i<mb.getNumRows(); i++ ) {
for( int j=0; j<mb.getNumColumns(); j++ ) {
- row[j] = UtilFunctions.doubleToObject(
- schema[j], mb.quickGetValue(i, j));
+ row[j] = UtilFunctions.doubleToObject(
+ schema[j], mb.quickGetValue(i, j));
}
frame.appendRow(row);
}