You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/05/15 14:08:22 UTC

[systemds] branch master updated: [SYSTEMDS-2975] Fix frame/matrix casting of large dense blocks

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 5878dad  [SYSTEMDS-2975] Fix frame/matrix casting of large dense blocks
5878dad is described below

commit 5878dad6fecda0585eafe094e4b99bc96fe4683b
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat May 15 15:25:24 2021 +0200

    [SYSTEMDS-2975] Fix frame/matrix casting of large dense blocks
    
    This patch fixes the missing support for large dense blocks in
    matrix-to-frame and frame-to-matrix conversions, specifically for the
    special case of homogeneous FP64 schemas. Furthermore, we introduce
    minor performance improvements that avoid unnecessary recomputation of
    the number of non-zeros, and better cache-blocking aligned with L1
    (for column to row-major and vice versa).
    
    Thanks to Lukas for catching this issue.
    
    Co-authored-by: Lukas Erlbacher <lu...@gmail.com>
---
 .../apache/sysds/runtime/util/DataConverter.java   | 85 +++++++++++++++-------
 1 file changed, 60 insertions(+), 25 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
index c181b5c..841c6d7 100644
--- a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
@@ -606,25 +606,43 @@ public class DataConverter {
 			// special case double schema (without cell-object creation, 
 			// cache-friendly row-column copy)
 			double[][] a = new double[n][];
-			double[] c = mb.getDenseBlockValues();
 			for( int j=0; j<n; j++ )
 				a[j] = (double[])frame.getColumnData(j);
-			int blocksizeIJ = 16; //blocks of a+overhead/c in L1 cache
-			for( int bi=0; bi<m; bi+=blocksizeIJ )
-				for( int bj=0; bj<n; bj+=blocksizeIJ ) {
-					int bimin = Math.min(bi+blocksizeIJ, m);
-					int bjmin = Math.min(bj+blocksizeIJ, n);
-					for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
-						for( int j=bj; j<bjmin; j++ )
-							c[aix+j] = a[j][i];
-				}
+			int blocksizeIJ = 32; //blocks of a+overhead/c in L1 cache
+			long lnnz = 0;
+			if( mb.getDenseBlock().isContiguous() ) {
+				double[] c = mb.getDenseBlockValues();
+				for( int bi=0; bi<m; bi+=blocksizeIJ )
+					for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+						int bimin = Math.min(bi+blocksizeIJ, m);
+						int bjmin = Math.min(bj+blocksizeIJ, n);
+						for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
+							for( int j=bj; j<bjmin; j++ )
+								lnnz += (c[aix+j] = a[j][i]) != 0 ? 1 : 0;
+					}
+			}
+			else {
+				DenseBlock c = mb.getDenseBlock();
+				for( int bi=0; bi<m; bi+=blocksizeIJ )
+					for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+						int bimin = Math.min(bi+blocksizeIJ, m);
+						int bjmin = Math.min(bj+blocksizeIJ, n);
+						for( int i=bi; i<bimin; i++ ) {
+							double[] cvals = c.values(i);
+							int cpos = c.pos(i);
+							for( int j=bj; j<bjmin; j++ )
+								lnnz += (cvals[cpos+j] = a[j][i]) != 0 ? 1 : 0;
+						}
+					}
+			}
+			mb.setNonZeros(lnnz);
 		}
 		else { 
 			//general case
 			for( int i=0; i<frame.getNumRows(); i++ ) 
 				for( int j=0; j<frame.getNumColumns(); j++ ) {
 					mb.appendValue(i, j, UtilFunctions.objectToDouble(
-							schema[j], frame.get(i, j)));
+						schema[j], frame.get(i, j)));
 				}
 		}
 		
@@ -729,7 +747,7 @@ public class DataConverter {
 					double[] aval = sblock.values(i);
 					for( int j=apos; j<apos+alen; j++ ) {
 						row[aix[j]] = UtilFunctions.doubleToObject(
-								schema[aix[j]], aval[j]);
+							schema[aix[j]], aval[j]);
 					}
 				}
 				frame.appendRow(row);
@@ -751,18 +769,35 @@ public class DataConverter {
 				// col pre-allocation, and cache-friendly row-column copy)
 				int m = mb.getNumRows();
 				int n = mb.getNumColumns();
-				double[] a = mb.getDenseBlockValues();
 				double[][] c = new double[n][m];
-				int blocksizeIJ = 16; //blocks of a/c+overhead in L1 cache
-				if( !mb.isEmptyBlock(false) )
-					for( int bi=0; bi<m; bi+=blocksizeIJ )
-						for( int bj=0; bj<n; bj+=blocksizeIJ ) {
-							int bimin = Math.min(bi+blocksizeIJ, m);
-							int bjmin = Math.min(bj+blocksizeIJ, n);
-							for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
-								for( int j=bj; j<bjmin; j++ )
-									c[j][i] = a[aix+j];
-						}
+				int blocksizeIJ = 32; //blocks of a/c+overhead in L1 cache
+				if( !mb.isEmptyBlock(false) ) {
+					if( mb.getDenseBlock().isContiguous() ) {
+						double[] a = mb.getDenseBlockValues();
+						for( int bi=0; bi<m; bi+=blocksizeIJ )
+							for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+								int bimin = Math.min(bi+blocksizeIJ, m);
+								int bjmin = Math.min(bj+blocksizeIJ, n);
+								for( int i=bi, aix=bi*n; i<bimin; i++, aix+=n )
+									for( int j=bj; j<bjmin; j++ )
+										c[j][i] = a[aix+j];
+							}
+					}
+					else { // large dense blocks
+						DenseBlock a = mb.getDenseBlock();
+						for( int bi=0; bi<m; bi+=blocksizeIJ )
+							for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+								int bimin = Math.min(bi+blocksizeIJ, m);
+								int bjmin = Math.min(bj+blocksizeIJ, n);
+								for( int i=bi; i<bimin; i++ ) {
+									double[] avals = a.values(i);
+									int apos = a.pos(i);
+									for( int j=bj; j<bjmin; j++ )
+										c[j][i] = avals[apos+j];
+								}
+							}
+					}
+				}
 				frame.reset();
 				frame.appendColumns(c);
 			}
@@ -770,8 +805,8 @@ public class DataConverter {
 				// general case
 				for( int i=0; i<mb.getNumRows(); i++ ) {
 					for( int j=0; j<mb.getNumColumns(); j++ ) {
-							row[j] = UtilFunctions.doubleToObject(
-									schema[j], mb.quickGetValue(i, j));
+						row[j] = UtilFunctions.doubleToObject(
+							schema[j], mb.quickGetValue(i, j));
 					}
 					frame.appendRow(row);
 				}