You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/14 22:20:55 UTC

systemml git commit: [SYSTEMML-1906] Additional tuning codegen row ops over compressed data

Repository: systemml
Updated Branches:
  refs/heads/master 34c8a9231 -> 3c7a6eb70


[SYSTEMML-1906] Additional tuning codegen row ops over compressed data

This patch further improves upon the previous performance enhancements
for codegen row-wise operations over compressed matrices. In detail,
this includes, (1) improved memory efficiency due to fewer partial
buffers, (2) the elimination of redundant partial row copies, and (3)
the elimination of an unnecessary initialization scan of the code vector
per segment. 

Together these changes improved the performance over Airline78 and
Mnist8m from 995ms to 735m and from 22s to 15.2s, respectively.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3c7a6eb7
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3c7a6eb7
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3c7a6eb7

Branch: refs/heads/master
Commit: 3c7a6eb70a160e338ce3204c052a6930de804c67
Parents: 34c8a92
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Sep 14 15:20:25 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Sep 14 15:21:01 2017 -0700

----------------------------------------------------------------------
 .../apache/sysml/runtime/compress/ColGroup.java | 12 +++++++-
 .../sysml/runtime/compress/ColGroupDDC.java     | 16 +++++------
 .../sysml/runtime/compress/ColGroupOLE.java     | 30 +++++++++-----------
 .../sysml/runtime/compress/ColGroupRLE.java     | 19 ++++++-------
 .../runtime/compress/ColGroupUncompressed.java  | 17 +++++------
 .../runtime/compress/CompressedMatrixBlock.java | 23 ++++++---------
 6 files changed, 56 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index ba6509c..d6187e9 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -281,7 +281,7 @@ public abstract class ColGroup implements Serializable
 	 * @param ru row upper index, exclusive
 	 * @return an iterator instance
 	 */
-	public abstract Iterator<double[]> getRowIterator(int rl, int ru);
+	public abstract ColGroupRowIterator getRowIterator(int rl, int ru);
 	
 	/**
 	 * Count the number of non-zeros per row
@@ -291,4 +291,14 @@ public abstract class ColGroup implements Serializable
  	 * @param ru row upper bound, exclusive
 	 */
 	protected abstract void countNonZerosPerRow(int[] rnnz, int rl, int ru);
+
+	/**
+	 * Base class for column group row iterators. We do not
+	 * implement the default Iterator interface in order to
+	 * avoid unnecessary value copies per group.
+	 */
+	protected abstract class ColGroupRowIterator {
+		public abstract boolean hasNext();
+		public abstract void next(double[] buff);
+	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
index 3618651..492f0b0 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
@@ -256,7 +256,7 @@ public abstract class ColGroupDDC extends ColGroupValue
 	}
 	
 	@Override
-	public Iterator<double[]> getRowIterator(int rl, int ru) {
+	public ColGroupRowIterator getRowIterator(int rl, int ru) {
 		return new DDCRowIterator(rl, ru);
 	}
 	
@@ -305,12 +305,9 @@ public abstract class ColGroupDDC extends ColGroupValue
 		}
 	}
 	
-	private class DDCRowIterator implements Iterator<double[]>
+	private class DDCRowIterator extends ColGroupRowIterator
 	{
-		//iterator configuration 
 		private final int _ru;
-		//iterator state
-		private final double[] _buff = new double[getNumCols()]; 
 		private int _rpos = -1;
 		
 		public DDCRowIterator(int rl, int ru) {
@@ -324,13 +321,14 @@ public abstract class ColGroupDDC extends ColGroupValue
 		}
 
 		@Override
-		public double[] next() {
-			//copy entire value tuple and 
+		public void next(double[] buff) {
+			//copy entire value tuple to output row
 			final int clen = getNumCols();
-			System.arraycopy(getValues(), getCode(_rpos)*clen, _buff, 0, clen);
+			final int off = getCode(_rpos)*clen;
+			for(int j=0; j<clen; j++)
+				buff[_colIndexes[j]] = _values[off+j];
 			//advance position to next row
 			_rpos++;
-			return _buff;
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
index 1f2cd50..6a4d3e8 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -782,7 +782,7 @@ public class ColGroupOLE extends ColGroupOffset
 	}
 	
 	@Override
-	public Iterator<double[]> getRowIterator(int rl, int ru) {
+	public ColGroupRowIterator getRowIterator(int rl, int ru) {
 		return new OLERowIterator(rl, ru);
 	}
 	
@@ -854,12 +854,11 @@ public class ColGroupOLE extends ColGroupOffset
 		}
 	}
 	
-	private class OLERowIterator implements Iterator<double[]>
+	private class OLERowIterator extends ColGroupRowIterator
 	{
 		//iterator configuration 
 		private final int _ru;
 		//iterator state
-		private final double[] _buff = new double[getNumCols()]; 
 		private final int[] _apos;
 		private final int[] _vcodes;
 		private int _rpos = -1;
@@ -869,6 +868,7 @@ public class ColGroupOLE extends ColGroupOffset
 			_rpos = rl;
 			_apos = skipScan(getNumValues(), rl);
 			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru-rl)];
+			Arrays.fill(_vcodes, -1); //initial reset
 			getNextSegment();
 		}
 		
@@ -878,36 +878,34 @@ public class ColGroupOLE extends ColGroupOffset
 		}
 		
 		@Override
-		public double[] next() {
+		public void next(double[] buff) {
 			//copy entire value tuple or reset to zero
 			int ix = _rpos%BitmapEncoder.BITMAP_BLOCK_SZ;
 			final int clen = getNumCols();
-			if( _vcodes[ix] >= 0 )
-				System.arraycopy(getValues(), _vcodes[ix]*clen, _buff, 0, clen);
-			else
-				Arrays.fill(_buff, 0);
+			for(int j=0, off=_vcodes[ix]*clen; j<clen; j++)
+				if( _vcodes[ix] >= 0 )
+					buff[_colIndexes[j]] = _values[off+j];
+			//reset vcode to avoid scan on next segment
+			_vcodes[ix] = -1;
 			//advance position to next row
 			_rpos++;
 			if( _rpos%BitmapEncoder.BITMAP_BLOCK_SZ==0 && _rpos<_ru )
 				getNextSegment();
-			return _buff;
 		}
 		
 		public void getNextSegment() {
 			//materialize value codes for entire segment in a 
 			//single pass over all values (store value code by pos)
-			Arrays.fill(_vcodes, -1);
 			final int numVals = getNumValues();
 			for (int k = 0; k < numVals; k++)  {
 				int boff = _ptr[k];
 				int blen = len(k);
 				int bix = _apos[k];
-				if( bix < blen ) {
-					int slen = _data[boff+bix];
-					for(int blckIx = 1; blckIx <= slen; blckIx++)
-						_vcodes[_data[boff+bix + blckIx]] = k;
-					_apos[k] += slen+1;
-				}
+				if( bix >= blen ) continue;
+				int slen = _data[boff+bix];
+				for(int i=0, off=boff+bix+1; i<slen; i++)
+					_vcodes[_data[off+i]] = k;
+				_apos[k] += slen+1;
 			}
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
index 478fd31..9b0bfc4 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
@@ -797,7 +797,7 @@ public class ColGroupRLE extends ColGroupOffset
 	}
 	
 	@Override
-	public Iterator<double[]> getRowIterator(int rl, int ru) {
+	public ColGroupRowIterator getRowIterator(int rl, int ru) {
 		return new RLERowIterator(rl, ru);
 	}
 	
@@ -855,12 +855,11 @@ public class ColGroupRLE extends ColGroupOffset
 		}
 	}
 	
-	private class RLERowIterator implements Iterator<double[]>
+	private class RLERowIterator extends ColGroupRowIterator
 	{
 		//iterator configuration 
 		private final int _ru;
 		//iterator state
-		private final double[] _buff = new double[getNumCols()];
 		private final int[] _astart;
 		private final int[] _apos;
 		private final int[] _vcodes;
@@ -872,6 +871,7 @@ public class ColGroupRLE extends ColGroupOffset
 			_astart = new int[getNumValues()];
 			_apos = skipScan(getNumValues(), rl, _astart);
 			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru-rl)];
+			Arrays.fill(_vcodes, -1); //initial reset
 			getNextSegment();
 		}
 		
@@ -881,25 +881,24 @@ public class ColGroupRLE extends ColGroupOffset
 		}
 		
 		@Override
-		public double[] next() {
+		public void next(double[] buff) {
 			//copy entire value tuple or reset to zero
 			int ix = _rpos%BitmapEncoder.BITMAP_BLOCK_SZ;
 			final int clen = getNumCols();
-			if( _vcodes[ix] >= 0 )
-				System.arraycopy(getValues(), _vcodes[ix]*clen, _buff, 0, clen);
-			else
-				Arrays.fill(_buff, 0);
+			for(int j=0, off=_vcodes[ix]*clen; j<clen; j++)
+				if( _vcodes[ix] >= 0 )
+					buff[_colIndexes[j]] = _values[off+j];
+			//reset vcode to avoid scan on next segment
+			_vcodes[ix] = -1;
 			//advance position to next row
 			_rpos++;
 			if( _rpos%BitmapEncoder.BITMAP_BLOCK_SZ==0 && _rpos<_ru )
 				getNextSegment();
-			return _buff;
 		}
 		
 		public void getNextSegment() {
 			//materialize value codes for entire segment in a 
 			//single pass over all values (store value code by pos)
-			Arrays.fill(_vcodes, -1);
 			final int numVals = getNumValues();
 			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
 			for (int k = 0; k < numVals; k++) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index b27215a..8f85574 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -418,7 +418,7 @@ public class ColGroupUncompressed extends ColGroup
 	}
 	
 	@Override
-	public Iterator<double[]> getRowIterator(int rl, int ru) {
+	public ColGroupRowIterator getRowIterator(int rl, int ru) {
 		return new UCRowIterator(rl, ru);
 	}
 	
@@ -467,12 +467,9 @@ public class ColGroupUncompressed extends ColGroup
 		}
 	}
 	
-	private class UCRowIterator implements Iterator<double[]>
+	private class UCRowIterator extends ColGroupRowIterator
 	{
-		//iterator configuration
 		private final int _ru;
-		//iterator state
-		private final double[] _buff = new double[getNumCols()];
 		private int _rpos = -1;
 		
 		public UCRowIterator(int rl, int ru) {
@@ -486,11 +483,10 @@ public class ColGroupUncompressed extends ColGroup
 		}
 		
 		@Override
-		public double[] next() {
+		public void next(double[] buff) {
 			//copy entire dense/sparse row
 			if( _data.isAllocated() ) {
 				if( _data.isInSparseFormat() ) {
-					Arrays.fill(_buff, 0); //reset
 					if( !_data.getSparseBlock().isEmpty(_rpos) ) {
 						SparseBlock sblock = _data.getSparseBlock();
 						int apos = sblock.pos(_rpos);
@@ -498,17 +494,18 @@ public class ColGroupUncompressed extends ColGroup
 						int[] aix = sblock.indexes(_rpos);
 						double[] avals = sblock.values(_rpos);
 						for(int k=apos; k<apos+alen; k++)
-							_buff[aix[k]] = avals[k];
+							buff[_colIndexes[aix[k]]] = avals[k];
 					}
 				}
 				else {
 					final int clen = getNumCols();
-					System.arraycopy(_data.getDenseBlock(), _rpos*clen, _buff, 0, clen);
+					double[] a = _data.getDenseBlock();
+					for(int j=0, aix=_rpos*clen; j<clen; j++)
+						buff[_colIndexes[j]] = a[aix+j];
 				}
 			}
 			//advance position to next row
 			_rpos++;
-			return _buff;
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3c7a6eb7/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index 3299594..b95b3af 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -47,6 +47,7 @@ import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.lops.MMTSJ.MMTSJType;
 import org.apache.sysml.lops.MapMultChain.ChainType;
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.ColGroup.ColGroupRowIterator;
 import org.apache.sysml.runtime.compress.ColGroup.CompressionType;
 import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder;
 import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
@@ -2305,16 +2306,15 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		protected final int _ru;
 		
 		//iterator state
-		protected Iterator<double[]>[] _iters = null;
+		protected ColGroupRowIterator[] _iters = null;
 		protected int _rpos;
 		
-		@SuppressWarnings("unchecked")
 		public RowIterator(int rl, int ru) {
 			_rl = rl;
 			_ru = ru;
 			
 			//initialize array of column group iterators
-			_iters = new Iterator[_colGroups.size()];
+			_iters = new ColGroupRowIterator[_colGroups.size()];
 			for( int i=0; i<_colGroups.size(); i++ )
 				_iters[i] = _colGroups.get(i).getRowIterator(_rl, _ru);
 			
@@ -2339,12 +2339,9 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		@Override
 		public double[] next() {
 			//copy group rows into consolidated row
-			for(int j=0; j<_iters.length; j++) {
-				ColGroup grp = _colGroups.get(j);
-				double[] row = _iters[j].next();
-				for( int k=0; k<row.length; k++ )
-					_ret[grp.getColIndex(k)] = row[k];
-			}
+			Arrays.fill(_ret, 0);
+			for(int j=0; j<_iters.length; j++)
+				_iters[j].next(_ret);
 			//advance to next row and return buffer
 			_rpos++;
 			return _ret;
@@ -2364,12 +2361,8 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		public SparseRow next() {
 			//copy group rows into consolidated dense vector
 			//to avoid binary search+shifting or final sort
-			for(int j=0; j<_iters.length; j++) {
-				ColGroup grp = _colGroups.get(j);
-				double[] row = _iters[j].next();
-				for( int k=0; k<row.length; k++ )
-					_tmp[grp.getColIndex(k)] = row[k];
-			}
+			for(int j=0; j<_iters.length; j++)
+				_iters[j].next(_tmp);
 			//append non-zero values to consolidated sparse row
 			_ret.setSize(0);
 			for(int i=0; i<_tmp.length; i++)