You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/06/01 06:19:00 UTC

incubator-systemml git commit: [SYSTEMML-1289] Codegen row-wise operations over compressed matrices

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 772fb5883 -> d2b9e5022


[SYSTEMML-1289] Codegen row-wise operations over compressed matrices

This patch extends the codegen rowwise template with support for
compressed matrices. Since this template requires access to entire rows,
we accordingly introduce new dense and sparse row iterators over
compressed matrices. Furthermore, this also includes a generalization of
the underlying column group iterators, which now support row major
access order if requested.

Additionally, this fixes recently introduced compilation warnings of the
GPU backend.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d2b9e502
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d2b9e502
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d2b9e502

Branch: refs/heads/master
Commit: d2b9e50228fa72a9657c3fc22eaf05bca3cab984
Parents: 772fb58
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 31 19:26:06 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 31 23:18:23 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/codegen/SpoofRowwise.java     |  45 +++-
 .../apache/sysml/runtime/compress/ColGroup.java |   3 +-
 .../sysml/runtime/compress/ColGroupDDC.java     |   3 +-
 .../sysml/runtime/compress/ColGroupOffset.java  |  99 ++++++-
 .../runtime/compress/ColGroupUncompressed.java  |  13 +-
 .../sysml/runtime/compress/ColGroupValue.java   |   4 +
 .../runtime/compress/CompressedMatrixBlock.java | 120 ++++++++-
 .../instructions/gpu/context/GPUContext.java    |   4 -
 .../runtime/matrix/data/LibMatrixCUDA.java      |   1 +
 .../codegen/CompressedRowAggregateTest.java     | 267 +++++++++++++++++++
 .../codegen/CompressedRowAggregateMain.R        |  34 +++
 .../codegen/CompressedRowAggregateMain.dml      |  31 +++
 .../functions/codegen/ZPackageSuite.java        |   1 +
 13 files changed, 598 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index e793345..09d5b29 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -20,6 +20,7 @@
 package org.apache.sysml.runtime.codegen;
 
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
@@ -27,10 +28,12 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseRow;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
 
@@ -101,10 +104,13 @@ public abstract class SpoofRowwise extends SpoofOperator
 			LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n);
 		
 		//core sequential execute
-		if( !inputs.get(0).isInSparseFormat() )
-			executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, n, 0, m);
+		MatrixBlock a = inputs.get(0);
+		if( a instanceof CompressedMatrixBlock )
+			executeCompressed((CompressedMatrixBlock)a, b, scalars, c, n, 0, m);
+		else if( !a.isInSparseFormat() )
+			executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m);
 		else
-			executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, n, 0, m);
+			executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m);
 	
 		//post-processing
 		if( allocTmp )
@@ -171,7 +177,7 @@ public abstract class SpoofRowwise extends SpoofOperator
 		}
 		catch(Exception ex) {
 			throw new DMLRuntimeException(ex);
-		}	
+		}
 	}
 	
 	private void allocateOutputMatrix(int m, int n, MatrixBlock out) {
@@ -213,6 +219,29 @@ public abstract class SpoofRowwise extends SpoofOperator
 		}
 	}
 	
+	private void executeCompressed(CompressedMatrixBlock a, double[][] b, double[] scalars, double[] c, int n, int rl, int ru) 
+	{
+		if( a.isEmptyBlock(false) )
+			return;
+		
+		if( !a.isInSparseFormat() ) { //DENSE
+			Iterator<double[]> iter = a.getDenseRowIterator(rl, ru);
+			for( int i=rl; iter.hasNext(); i++ ) {
+				genexecRowDense(iter.next(), 0, b, scalars, c, n, i);
+			}
+		}
+		else { //SPARSE
+			Iterator<SparseRow> iter = a.getSparseRowIterator(rl, ru);
+			for( int i=rl; iter.hasNext(); i++ ) {
+				SparseRow row = iter.next();
+				if( !row.isEmpty() ) {
+					genexecRowSparse(row.values(), row.indexes(), 
+						0, b, scalars, c, row.size(), i);
+				}
+			}
+		}
+	}
+	
 	//methods to be implemented by generated operators of type SpoofRowAggrgate 
 	
 	protected abstract void genexecRowDense( double[] a, int ai, double[][] b, double[] scalars, double[] c, int len, int rowIndex );
@@ -248,7 +277,9 @@ public abstract class SpoofRowwise extends SpoofOperator
 			LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen);
 			double[] c = new double[_clen];
 			
-			if( !_a.isInSparseFormat() )
+			if( _a instanceof CompressedMatrixBlock )
+				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru);
+			else if( !_a.isInSparseFormat() )
 				executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru);
 			else
 				executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru);
@@ -286,7 +317,9 @@ public abstract class SpoofRowwise extends SpoofOperator
 			//allocate vector intermediates
 			LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen);
 			
-			if( !_a.isInSparseFormat() )
+			if( _a instanceof CompressedMatrixBlock )
+				executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+			else if( !_a.isInSparseFormat() )
 				executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
 			else
 				executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index 2f9d1de..dbed2b4 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -261,7 +261,8 @@ public abstract class ColGroup implements Serializable
 	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
 		throws DMLRuntimeException;
 	
-	public abstract Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros);
+	public abstract Iterator<IJV> getIterator(int rl, int ru,
+			boolean inclZeros, boolean rowMajor);
 	
 	/**
 	 * Count the number of non-zeros per row

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
index 9a4a982..4bf7c20 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
@@ -239,7 +239,8 @@ public abstract class ColGroupDDC extends ColGroupValue
 	}
 	
 	@Override
-	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros) {
+	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
+		//DDC iterator is always row major, so no need for custom handling
 		return new DDCIterator(rl, ru, inclZeros);
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
index bc0b7f1..98bbdb8 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
@@ -23,6 +23,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Iterator;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
@@ -413,8 +414,11 @@ public abstract class ColGroupOffset extends ColGroupValue
 	protected abstract boolean[] computeZeroIndicatorVector();
 	
 	@Override
-	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros) {
-		return new OffsetIterator(rl, ru, inclZeros);
+	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
+		if( rowMajor )
+			return new OffsetRowIterator(rl, ru, inclZeros);
+		else
+			return new OffsetValueIterator(rl, ru, inclZeros);
 	}
 	
 	/**
@@ -433,7 +437,7 @@ public abstract class ColGroupOffset extends ColGroupValue
 	public abstract Iterator<Integer> getIterator(int k, int rl, int ru);
 
 	
-	protected class OffsetIterator implements Iterator<IJV>
+	protected class OffsetValueIterator implements Iterator<IJV>
 	{
 		//iterator configuration
 		private final int _rl;
@@ -447,7 +451,7 @@ public abstract class ColGroupOffset extends ColGroupValue
 		private int _rpos = -1;
 		private int _cpos = -1;
 		
-		public OffsetIterator(int rl, int ru, boolean inclZeros) {
+		public OffsetValueIterator(int rl, int ru, boolean inclZeros) {
 			_rl = rl;
 			_ru = ru;
 			_inclZeros = inclZeros;
@@ -464,6 +468,8 @@ public abstract class ColGroupOffset extends ColGroupValue
 		
 		@Override
 		public IJV next() {
+			if( !hasNext() )
+				throw new RuntimeException("No more offset entries.");
 			_buff.set(_rpos, _colIndexes[_cpos], (_vpos >= getNumValues()) ? 
 				0 : _values[_vpos*getNumCols()+_cpos]);
 			getNextValue();
@@ -472,7 +478,7 @@ public abstract class ColGroupOffset extends ColGroupValue
 		
 		private void getNextValue() {
 			//advance to next value iterator if required
-			if(_viter != null && _viter instanceof ZeroIterator && !_viter.hasNext() ) {
+			if(_viter != null && _viter instanceof ZeroValueIterator && !_viter.hasNext() ) {
 				_rpos = _ru; //end after zero iterator
 				return;
 			}
@@ -483,7 +489,7 @@ public abstract class ColGroupOffset extends ColGroupValue
 					if( _vpos < getNumValues() )
 						_viter = getIterator(_vpos, _rl, _ru);
 					else if( _inclZeros && _zeros)
-						_viter = new ZeroIterator(_rl, _ru);
+						_viter = new ZeroValueIterator(_rl, _ru);
 					else {
 						_rpos = _ru; //end w/o zero iterator
 						return;
@@ -504,13 +510,13 @@ public abstract class ColGroupOffset extends ColGroupValue
 		}
 	}
 	
-	protected class ZeroIterator implements Iterator<Integer>
+	protected class ZeroValueIterator implements Iterator<Integer>
 	{
 		private final boolean[] _zeros;
 		private final int _ru;
 		private int _rpos; 
 		
-		public ZeroIterator(int rl, int ru) {
+		public ZeroValueIterator(int rl, int ru) {
 			_zeros = computeZeroIndicatorVector();
 			_ru = ru;
 			_rpos = rl-1;
@@ -534,4 +540,81 @@ public abstract class ColGroupOffset extends ColGroupValue
 			while( _rpos < _ru && !_zeros[_rpos] );
 		}
 	}
+	
+	protected class OffsetRowIterator implements Iterator<IJV>
+	{
+		//iterator configuration
+		private final int _rl;
+		private final int _ru;
+		private final boolean _inclZeros;
+		
+		//iterator state
+		private final Iterator<Integer>[] _iters;
+		private final IJV _ret = new IJV(); 
+		private final HashMap<Integer,Integer> _ixbuff = 
+			new HashMap<Integer,Integer>(); //<rowid-value>
+		private int _rpos;
+		private int _cpos;
+		private int _vpos;
+		
+		@SuppressWarnings("unchecked")
+		public OffsetRowIterator(int rl, int ru, boolean inclZeros) {
+			_rl = rl;
+			_ru = ru;
+			_inclZeros = inclZeros;
+			
+			//initialize array of column group iterators
+			_iters = new Iterator[getNumValues()];
+			for( int k=0; k<getNumValues(); k++ )
+				_iters[k] = getIterator(k, _rl, _ru);
+			
+			//initialize O(1)-lookup for next value
+			for( int k=0; k<getNumValues(); k++ ) {
+				_ixbuff.put(_iters[k].hasNext() ? 
+						_iters[k].next() : _ru+k, k);
+			}
+			
+			//get initial row
+			_rpos = rl-1;
+			_cpos = getNumCols()-1;
+			getNextValue();
+		}
+		
+		@Override
+		public boolean hasNext() {
+			return (_rpos < _ru);
+		}
+		
+		@Override
+		public IJV next() {
+			if( !hasNext() )
+				throw new RuntimeException("No more offset entries.");
+			_ret.set(_rpos, _colIndexes[_cpos], 
+				(_vpos<0) ? 0 : getValue(_vpos, _cpos));
+			getNextValue();
+			return _ret;
+		}
+		
+		private void getNextValue() {
+			do {
+				//read iterators if necessary
+				if( _cpos+1 >= getNumCols() ) {
+					_rpos++; _cpos = -1; _vpos = -1;
+					//direct lookup of single value to pull next index
+					Integer ktmp = _ixbuff.remove(_rpos);
+					if( ktmp != null ) {
+						_ixbuff.put(_iters[ktmp].hasNext() ? 
+								_iters[ktmp].next() : _ru+ktmp, ktmp);
+						_vpos = ktmp;
+					}
+				}
+				//check for end of row partition
+				if( _rpos >= _ru )
+					return;
+				_cpos++;
+			}
+			while( !_inclZeros && (_vpos < 0 
+				|| getValue(_vpos, _cpos)==0) );
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index 526df16..7e54e4e 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -411,18 +411,19 @@ public class ColGroupUncompressed extends ColGroup
 	}
 	
 	@Override
-	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros) {
+	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
+		//UC iterator is always row major, so no need for custom handling
 		return new UCIterator(rl, ru, inclZeros);
 	}
 	
 	private class UCIterator implements Iterator<IJV>
 	{
-		//iterator configuration 
+		//iterator configuration
 		private final int _ru;
 		private final boolean _inclZeros;
 		
 		//iterator state
-		private final IJV _buff = new IJV(); 
+		private final IJV _buff = new IJV();
 		private int _rpos = -1;
 		private int _cpos = -1;
 		private double _value = 0;
@@ -434,12 +435,12 @@ public class ColGroupUncompressed extends ColGroup
 			_cpos = -1;
 			getNextValue();
 		}
-
+		
 		@Override
 		public boolean hasNext() {
 			return (_rpos < _ru);
 		}
-
+		
 		@Override
 		public IJV next() {
 			_buff.set(_rpos, _colIndexes[_cpos], _value);
@@ -456,7 +457,7 @@ public class ColGroupUncompressed extends ColGroup
 					return; //reached end
 				_value = _data.quickGetValue(_rpos, _cpos);
 			}
-			while( !_inclZeros && _value==0);
+			while( !_inclZeros && _value==0 );
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
index 79d963e..4f651eb 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
@@ -128,6 +128,10 @@ public abstract class ColGroupValue extends ColGroup
 		return _values;
 	}
 	
+	public double getValue(int k, int col) {
+		return _values[k*getNumCols()+col];
+	}
+	
 	public MatrixBlock getValuesAsBlock() {
 		boolean containsZeros = (this instanceof ColGroupOffset) ?
 			((ColGroupOffset)this)._zeros : false;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index ca22b63..d141d85 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -77,6 +77,8 @@ import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysml.runtime.matrix.data.MatrixValue;
 import org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseRow;
+import org.apache.sysml.runtime.matrix.data.SparseRowVector;
 import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
 import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
 import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
@@ -812,6 +814,14 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		return new ColumnGroupIterator(rl, ru, cgl, cgu, inclZeros);
 	}
 	
+	public Iterator<double[]> getDenseRowIterator(int rl, int ru) {
+		return new DenseRowIterator(rl, ru);
+	}
+	
+	public Iterator<SparseRow> getSparseRowIterator(int rl, int ru) {
+		return new SparseRowIterator(rl, ru);
+	}
+	
 	public int[] countNonZerosPerRow(int rl, int ru) {
 		int[] rnnz = new int[ru-rl];
 		for (ColGroup grp : _colGroups)
@@ -2277,11 +2287,119 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			while( _posColGroup+1 < _cgu ) {
 				_posColGroup++;
 				_iterColGroup = _colGroups.get(_posColGroup)
-					.getIterator(_rl, _ru, _inclZeros);
+					.getIterator(_rl, _ru, _inclZeros, false);
 				if( _iterColGroup.hasNext() )
 					return;
 			}
 			_noNext = true;
 		}
 	}
+	
+	private abstract class RowIterator<T> implements Iterator<T>
+	{
+		//iterator configuration 
+		protected final int _rl;
+		protected final int _ru;
+		
+		//iterator state
+		private Iterator<IJV>[] _iters = null;
+		protected final int[] _ixbuff = new int[clen];
+		protected final double[] _vbuff = new double[clen];
+		protected int _rpos;
+		
+		@SuppressWarnings("unchecked")
+		public RowIterator(int rl, int ru) {
+			_rl = rl;
+			_ru = ru;
+			
+			//initialize array of column group iterators
+			_iters = new Iterator[_colGroups.size()];
+			for( int i=0; i<_colGroups.size(); i++ )
+				_iters[i] = _colGroups.get(i).getIterator(
+					_rl, _ru, true, true);
+			Arrays.fill(_ixbuff, -1);
+			
+			//get initial row
+			_rpos = rl-1;
+			getNextRow();
+		}
+		
+		@Override
+		public boolean hasNext() {
+			return (_rpos < _ru);
+		}
+		
+		@Override
+		public abstract T next();
+		
+		protected void getNextRow() {
+			_rpos++;
+			//read iterators if necessary
+			for(int j=0; j<_iters.length; j++) {
+				ColGroup grp = _colGroups.get(j);
+				if( _ixbuff[grp.getColIndex(0)] < _rpos ) {
+					if( _iters[j].hasNext() ) {
+						for( int k=0; k<grp.getNumCols(); k++ ) {
+							IJV cell = _iters[j].next();
+							_ixbuff[cell.getJ()] = cell.getI();
+							_vbuff[cell.getJ()] = cell.getV();
+						}
+					}
+					else {
+						for( int k=0; k<grp.getNumCols(); k++ )
+							_ixbuff[grp.getColIndex(k)] = _ru;
+					}
+				}
+			}
+		}
+	}
+	
+	private class DenseRowIterator extends RowIterator<double[]>
+	{
+		private final double[] _ret = new double[clen];
+		
+		public DenseRowIterator(int rl, int ru) {
+			super(rl, ru);
+		}
+		
+		@Override
+		public double[] next() {
+			if( !hasNext() )
+				throw new RuntimeException("No more rows in row partition ["+_rl+","+_ru+")");
+			//copy currently buffered row entries
+			for( int j=0; j<clen; j++ )
+				_ret[j] = (_ixbuff[j] == _rpos) ? _vbuff[j] : 0;
+			//advance to next row and return buffer
+			getNextRow();
+			return _ret;
+		}
+	}
+	
+	private class SparseRowIterator extends RowIterator<SparseRow>
+	{
+		private final SparseRowVector _ret = new SparseRowVector(clen);
+		
+		public SparseRowIterator(int rl, int ru) {
+			super(rl, ru);
+		}
+
+		@Override
+		public boolean hasNext() {
+			return (_rpos < _ru);
+		}
+
+		@Override
+		public SparseRow next() {
+			if( !hasNext() )
+				throw new RuntimeException("No more rows in row partition ["+_rl+","+_ru+")");
+			//copy currently buffered row entries
+			_ret.setSize(0);
+			for( int j=0; j<clen; j++ )
+				if( _ixbuff[j] == _rpos )
+					_ret.append(j, _vbuff[j]);
+			//advance to next row and return buffer
+			getNextRow();
+			return _ret;
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index 89a2b67..8da67ea 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -158,7 +158,6 @@ public class GPUContext {
 
   }
 
-  @SuppressWarnings("unused")
   public int getDeviceNum() {
     return deviceNum;
   }
@@ -174,7 +173,6 @@ public class GPUContext {
     cudaSetDevice(deviceNum);
   }
 
-  @SuppressWarnings("unused")
   public static int cudaGetDevice() {
     int[] device = new int[1];
     JCuda.cudaGetDevice(device);
@@ -550,7 +548,6 @@ public class GPUContext {
    * @return the shared memory per block
    * @throws DMLRuntimeException ?
    */
-  @SuppressWarnings("unused")
   public long getMaxSharedMemory() throws DMLRuntimeException {
     cudaDeviceProp deviceProp = getGPUProperties();
     return deviceProp.sharedMemPerBlock;
@@ -596,7 +593,6 @@ public class GPUContext {
    *
    * @throws DMLRuntimeException if error
    */
-  @SuppressWarnings("unused")
   public void destroy() throws DMLRuntimeException {
     LOG.trace("GPU : this context was destroyed, this = " + this.toString());
     clearMemory();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 7990fef..2453b41 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -2643,6 +2643,7 @@ public class LibMatrixCUDA {
 		deviceCopy(instName, srcPtr, destPtr, (int)src.getNumRows(), (int)src.getNumColumns());
 	}
 
+	@SuppressWarnings("unused")
 	private static void compareAndSet(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in, String outputName, double compareVal,  double tolerance,
 																		double ifEqualsVal, double ifLessThanVal, double ifGreaterThanVal) throws DMLRuntimeException {
 		if (ec.getGPUContext() != gCtx)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedRowAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedRowAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedRowAggregateTest.java
new file mode 100644
index 0000000..02afa42
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedRowAggregateTest.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.io.File;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class CompressedRowAggregateTest extends AutomatedTestBase 
+{	
+	private static final String TEST_NAME1 = "CompressedRowAggregateMain";
+	private static final String TEST_DIR = "functions/codegen/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + CompressedRowAggregateTest.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen-compress.xml";
+	private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF);
+	
+	private static final int rows = 2023;
+	private static final int cols = 876;
+	private static final double sparsity1 = 0.9;
+	private static final double sparsity2 = 0.1;
+	private static final double sparsity3 = 0.0;
+	private static final double eps = Math.pow(10, -6);
+	
+	public enum SparsityType {
+		DENSE,
+		SPARSE,
+		EMPTY,
+	}
+	
+	public enum ValueType {
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
+	}
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) );
+	}
+		
+	@Test
+	public void testCompressedRowAggregateMainDenseConstCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.CONST, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRandCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRand2CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRand3CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseConstCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.CONST, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRandCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRand2CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRand3CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyConstCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.CONST, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRandCP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRand2CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_DDC, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRand3CP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.CP );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseConstSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.CONST, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRandSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRand2SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainDenseRand3SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseConstSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.CONST, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRandSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRand2SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainSparseRand3SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyConstSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.CONST, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRandSP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRand2SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+	}
+	
+	@Test
+	public void testCompressedRowAggregateMainEmptyRand3SP() {
+		testCompressedRowAggregate( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+	}
+	
+	private void testCompressedRowAggregate(String testname, SparsityType stype, ValueType vtype, ExecType et)
+	{	
+		boolean oldRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( et ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+		
+		try
+		{
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = true;
+			TestConfiguration config = getTestConfiguration(testname);
+			loadTestConfiguration(config);
+			
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + testname + ".dml";
+			programArgs = new String[]{"-explain", "-stats", 
+					"-args", input("X"), output("R") };
+			
+			fullRScriptName = HOME + testname + ".R";
+			rCmd = getRCmd(inputDir(), expectedDir());			
+
+			//generate input data
+			double sparsity = -1;
+			switch( stype ){
+				case DENSE: sparsity = sparsity1; break;
+				case SPARSE: sparsity = sparsity2; break;
+				case EMPTY: sparsity = sparsity3; break;
+			}
+			
+			//generate input data
+			double min = (vtype==ValueType.CONST)? 10 : -10;
+			double[][] X = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
+				X = TestUtils.round(X);
+			}
+			writeInputMatrixWithMTD("X", X, true);
+			
+			//run tests
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("R");	
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoofRA") 
+				|| heavyHittersContainsSubString("sp_spoofRA"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
+	}	
+
+	/**
+	 * Override default configuration with custom test configuration to ensure
+	 * scratch space and local temporary directory locations are also updated.
+	 */
+	@Override
+	protected File getConfigTemplateFile() {
+		// Instrumentation in this test's output log to show custom configuration file used for template.
+		System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath());
+		return TEST_CONF_FILE;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/test/scripts/functions/codegen/CompressedRowAggregateMain.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/CompressedRowAggregateMain.R b/src/test/scripts/functions/codegen/CompressedRowAggregateMain.R
new file mode 100644
index 0000000..44e7ec5
--- /dev/null
+++ b/src/test/scripts/functions/codegen/CompressedRowAggregateMain.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library("Matrix")
+library("matrixStats")
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+v = seq(1, ncol(X));
+w = seq(1, nrow(X));
+
+R1 = t(X) %*% (X %*% v);
+R2 = t(X) %*% (w * (X %*% v));
+R = (R1 + R2) / 1e4;
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "R", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/test/scripts/functions/codegen/CompressedRowAggregateMain.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/CompressedRowAggregateMain.dml b/src/test/scripts/functions/codegen/CompressedRowAggregateMain.dml
new file mode 100644
index 0000000..3c82181
--- /dev/null
+++ b/src/test/scripts/functions/codegen/CompressedRowAggregateMain.dml
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+v = seq(1, ncol(X));
+w = seq(1, nrow(X));
+
+R1 = t(X) %*% (X %*% v);
+if(1==1) {}
+R2 = t(X) %*% (w * (X %*% v));
+R = (R1 + R2) / 1e4;
+
+write(R, $2)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d2b9e502/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
index ea8a1f1..2aa0b3a 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
@@ -36,6 +36,7 @@ import org.junit.runners.Suite;
 	CompressedCellwiseTest.class,
 	CompressedMultiAggregateTest.class,
 	CompressedOuterProductTest.class,
+	CompressedRowAggregateTest.class,
 	DAGCellwiseTmplTest.class,
 	MultiAggTmplTest.class,
 	OuterProdTmplTest.class,