You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/22 07:56:06 UTC

systemml git commit: [SYSTEMML-1906] Tuning codegen row ops over compressed matrices, part2

Repository: systemml
Updated Branches:
  refs/heads/master 9a286a213 -> 900d8c926


[SYSTEMML-1906] Tuning codegen row ops over compressed matrices, part2

Despite multiple rounds of improvements, the row-wise codegen template
over compressed matrices still shows inferior performance compared to
hand-coded CLA operations. Hence, this patch makes some additional
improvements (pull common code from group iterators into row iterator,
make the group iterators more compact, avoid unnecessary L1d cache
loads). Over Airline78 and Mnist8m this patch improved performance from
735ms to 704ms and from 15.2s to 13.2s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/900d8c92
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/900d8c92
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/900d8c92

Branch: refs/heads/master
Commit: 900d8c9260c7654ff855d63d957bbae3d2730100
Parents: 9a286a2
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Sep 22 00:35:12 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Sep 22 00:55:59 2017 -0700

----------------------------------------------------------------------
 .../apache/sysml/runtime/compress/ColGroup.java |  3 +-
 .../sysml/runtime/compress/ColGroupDDC.java     | 17 ++------
 .../sysml/runtime/compress/ColGroupOLE.java     | 34 ++++++----------
 .../sysml/runtime/compress/ColGroupRLE.java     | 41 +++++++-------------
 .../runtime/compress/ColGroupUncompressed.java  | 27 ++++---------
 .../runtime/compress/CompressedMatrixBlock.java | 12 +++++-
 6 files changed, 48 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index d6187e9..5097aec 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -298,7 +298,6 @@ public abstract class ColGroup implements Serializable
 	 * avoid unnecessary value copies per group.
 	 */
 	protected abstract class ColGroupRowIterator {
-		public abstract boolean hasNext();
-		public abstract void next(double[] buff);
+		public abstract void next(double[] buff, int rowIx, int segIx, boolean last);
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
index 492f0b0..d261f96 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
@@ -307,28 +307,17 @@ public abstract class ColGroupDDC extends ColGroupValue
 	
 	private class DDCRowIterator extends ColGroupRowIterator
 	{
-		private final int _ru;
-		private int _rpos = -1;
-		
 		public DDCRowIterator(int rl, int ru) {
-			_ru = ru;
-			_rpos = rl;
-		}
-
-		@Override
-		public boolean hasNext() {
-			return (_rpos < _ru);
+			//do nothing
 		}
 
 		@Override
-		public void next(double[] buff) {
+		public void next(double[] buff, int rowIx, int segIx, boolean last) {
 			//copy entire value tuple to output row
 			final int clen = getNumCols();
-			final int off = getCode(_rpos)*clen;
+			final int off = getCode(rowIx)*clen;
 			for(int j=0; j<clen; j++)
 				buff[_colIndexes[j]] = _values[off+j];
-			//advance position to next row
-			_rpos++;
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
index 6a4d3e8..540739a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -856,16 +856,10 @@ public class ColGroupOLE extends ColGroupOffset
 	
 	private class OLERowIterator extends ColGroupRowIterator
 	{
-		//iterator configuration 
-		private final int _ru;
-		//iterator state
 		private final int[] _apos;
 		private final int[] _vcodes;
-		private int _rpos = -1;
 		
 		public OLERowIterator(int rl, int ru) {
-			_ru = ru;
-			_rpos = rl;
 			_apos = skipScan(getNumValues(), rl);
 			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru-rl)];
 			Arrays.fill(_vcodes, -1); //initial reset
@@ -873,27 +867,21 @@ public class ColGroupOLE extends ColGroupOffset
 		}
 		
 		@Override
-		public boolean hasNext() {
-			return (_rpos < _ru);
-		}
-		
-		@Override
-		public void next(double[] buff) {
-			//copy entire value tuple or reset to zero
-			int ix = _rpos%BitmapEncoder.BITMAP_BLOCK_SZ;
-			final int clen = getNumCols();
-			for(int j=0, off=_vcodes[ix]*clen; j<clen; j++)
-				if( _vcodes[ix] >= 0 )
+		public void next(double[] buff, int rowIx, int segIx, boolean last) {
+			final int clen = _colIndexes.length;
+			final int vcode = _vcodes[segIx];
+			if( vcode >= 0 ) {
+				//copy entire value tuple if necessary
+				for(int j=0, off=vcode*clen; j<clen; j++)
 					buff[_colIndexes[j]] = _values[off+j];
-			//reset vcode to avoid scan on next segment
-			_vcodes[ix] = -1;
-			//advance position to next row
-			_rpos++;
-			if( _rpos%BitmapEncoder.BITMAP_BLOCK_SZ==0 && _rpos<_ru )
+				//reset vcode to avoid scan on next segment
+				_vcodes[segIx] = -1;
+			}
+			if( segIx+1==BitmapEncoder.BITMAP_BLOCK_SZ && !last )
 				getNextSegment();
 		}
 		
-		public void getNextSegment() {
+		private void getNextSegment() {
 			//materialize value codes for entire segment in a 
 			//single pass over all values (store value code by pos)
 			final int numVals = getNumValues();

http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
index 9b0bfc4..6440c5c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
@@ -857,46 +857,35 @@ public class ColGroupRLE extends ColGroupOffset
 	
 	private class RLERowIterator extends ColGroupRowIterator
 	{
-		//iterator configuration 
-		private final int _ru;
 		//iterator state
 		private final int[] _astart;
 		private final int[] _apos;
 		private final int[] _vcodes;
-		private int _rpos = -1;
 		
 		public RLERowIterator(int rl, int ru) {
-			_ru = ru;
-			_rpos = rl;
 			_astart = new int[getNumValues()];
 			_apos = skipScan(getNumValues(), rl, _astart);
 			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru-rl)];
 			Arrays.fill(_vcodes, -1); //initial reset
-			getNextSegment();
-		}
-		
-		@Override
-		public boolean hasNext() {
-			return (_rpos < _ru);
+			getNextSegment(rl);
 		}
 		
 		@Override
-		public void next(double[] buff) {
-			//copy entire value tuple or reset to zero
-			int ix = _rpos%BitmapEncoder.BITMAP_BLOCK_SZ;
+		public void next(double[] buff, int rowIx, int segIx, boolean last) {
 			final int clen = getNumCols();
-			for(int j=0, off=_vcodes[ix]*clen; j<clen; j++)
-				if( _vcodes[ix] >= 0 )
+			final int vcode = _vcodes[segIx];
+			if( vcode >= 0 ) {
+				//copy entire value tuple if necessary
+				for(int j=0, off=vcode*clen; j<clen; j++)
 					buff[_colIndexes[j]] = _values[off+j];
-			//reset vcode to avoid scan on next segment
-			_vcodes[ix] = -1;
-			//advance position to next row
-			_rpos++;
-			if( _rpos%BitmapEncoder.BITMAP_BLOCK_SZ==0 && _rpos<_ru )
-				getNextSegment();
+				//reset vcode to avoid scan on next segment
+				_vcodes[segIx] = -1;
+			}
+			if( segIx+1==BitmapEncoder.BITMAP_BLOCK_SZ && !last )
+				getNextSegment(rowIx+1);
 		}
 		
-		public void getNextSegment() {
+		private void getNextSegment(int rowIx) {
 			//materialize value codes for entire segment in a 
 			//single pass over all values (store value code by pos)
 			final int numVals = getNumValues();
@@ -906,13 +895,13 @@ public class ColGroupRLE extends ColGroupOffset
 				int blen = len(k);
 				int bix = _apos[k];
 				int start = _astart[k];
-				int end = (_rpos/blksz+1)*blksz;
+				int end = (rowIx/blksz+1)*blksz;
 				while( bix < blen && start < end ) {
 					int lstart = _data[boff + bix];
 					int llen = _data[boff + bix + 1];
 					//set codes of entire run, with awareness of unaligned runs/segments
-					Arrays.fill(_vcodes, Math.min(Math.max(_rpos, start+lstart), end)-_rpos, 
-						Math.min(start+lstart+llen,end)-_rpos, k);
+					Arrays.fill(_vcodes, Math.min(Math.max(rowIx, start+lstart), end)-rowIx, 
+						Math.min(start+lstart+llen,end)-rowIx, k);
 					if( start+lstart+llen >= end )
 						break;
 					start += lstart + llen;

http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index 8f85574..c219ad6 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -469,30 +469,21 @@ public class ColGroupUncompressed extends ColGroup
 	
 	private class UCRowIterator extends ColGroupRowIterator
 	{
-		private final int _ru;
-		private int _rpos = -1;
-		
 		public UCRowIterator(int rl, int ru) {
-			_ru = ru;
-			_rpos = rl;
-		}
-		
-		@Override
-		public boolean hasNext() {
-			return (_rpos < _ru);
+			//do nothing
 		}
 		
 		@Override
-		public void next(double[] buff) {
+		public void next(double[] buff, int rowIx, int segIx, boolean last) {
 			//copy entire dense/sparse row
 			if( _data.isAllocated() ) {
 				if( _data.isInSparseFormat() ) {
-					if( !_data.getSparseBlock().isEmpty(_rpos) ) {
+					if( !_data.getSparseBlock().isEmpty(rowIx) ) {
 						SparseBlock sblock = _data.getSparseBlock();
-						int apos = sblock.pos(_rpos);
-						int alen = sblock.size(_rpos);
-						int[] aix = sblock.indexes(_rpos);
-						double[] avals = sblock.values(_rpos);
+						int apos = sblock.pos(rowIx);
+						int alen = sblock.size(rowIx);
+						int[] aix = sblock.indexes(rowIx);
+						double[] avals = sblock.values(rowIx);
 						for(int k=apos; k<apos+alen; k++)
 							buff[_colIndexes[aix[k]]] = avals[k];
 					}
@@ -500,12 +491,10 @@ public class ColGroupUncompressed extends ColGroup
 				else {
 					final int clen = getNumCols();
 					double[] a = _data.getDenseBlock();
-					for(int j=0, aix=_rpos*clen; j<clen; j++)
+					for(int j=0, aix=rowIx*clen; j<clen; j++)
 						buff[_colIndexes[j]] = a[aix+j];
 				}
 			}
-			//advance position to next row
-			_rpos++;
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/900d8c92/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index 98529c8..57cc7bb 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -2417,10 +2417,14 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		
 		@Override
 		public double[] next() {
+			//prepare meta data common across column groups
+			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+			final int ix = _rpos % blksz;
+			final boolean last = (_rpos+1 == _ru);
 			//copy group rows into consolidated row
 			Arrays.fill(_ret, 0);
 			for(int j=0; j<_iters.length; j++)
-				_iters[j].next(_ret);
+				_iters[j].next(_ret, _rpos, ix, last);
 			//advance to next row and return buffer
 			_rpos++;
 			return _ret;
@@ -2438,10 +2442,14 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		
 		@Override
 		public SparseRow next() {
+			//prepare meta data common across column groups
+			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+			final int ix = _rpos % blksz;
+			final boolean last = (_rpos+1 == _ru);
 			//copy group rows into consolidated dense vector
 			//to avoid binary search+shifting or final sort
 			for(int j=0; j<_iters.length; j++)
-				_iters[j].next(_tmp);
+				_iters[j].next(_tmp, _rpos, ix, last);
 			//append non-zero values to consolidated sparse row
 			_ret.setSize(0);
 			for(int i=0; i<_tmp.length; i++)