You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/08 02:22:26 UTC

[1/5] incubator-systemml git commit: [SYSTEMML-449] Compressed linear algebra v2

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 390b81c26 -> 37a215bc3


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixMultChainTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixMultChainTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixMultChainTest.java
index d87b42a..4a9d4f5 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixMultChainTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixMultChainTest.java
@@ -46,9 +46,10 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -72,13 +73,23 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtXv, true);
+	public void testDenseRoundRandDataOLENoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtXv, true);
+	public void testSparseRoundRandDataOLENoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCNoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCNoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, true);
 	}
 	
 	@Test
@@ -107,13 +118,13 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtXv, false);
+	public void testDenseRoundRandDataOLENoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtXv, false);
+	public void testSparseRoundRandDataOLENoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, false);
 	}
 	
 	@Test
@@ -142,13 +153,23 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtwXv, true);
+	public void testDenseRoundRandDataOLEWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataOLEWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtwXv, true);
+	public void testDenseRoundRandDataDDCWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ChainType.XtwXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ChainType.XtwXv, true);
 	}
 	
 	@Test
@@ -177,13 +198,13 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtwXv, false);
+	public void testDenseRoundRandDataOLEWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtwXv, false);
+	public void testSparseRoundRandDataOLEWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, false);
 	}
 	
 	@Test
@@ -215,8 +236,10 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector1 = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 0, 1, 1.0, 3));
@@ -243,5 +266,8 @@ public class ParMatrixMultChainTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixVectorMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixVectorMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixVectorMultTest.java
index 2ec0ab8..30e57b4 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixVectorMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParMatrixVectorMultTest.java
@@ -50,9 +50,10 @@ public class ParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -76,13 +77,23 @@ public class ParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -111,13 +122,13 @@ public class ParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -150,8 +161,10 @@ public class ParMatrixVectorMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 1, 1, 1.0, 3));
@@ -178,5 +191,8 @@ public class ParMatrixVectorMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParTransposeSelfLeftMatrixMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParTransposeSelfLeftMatrixMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParTransposeSelfLeftMatrixMultTest.java
index 4091315..a12588a 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParTransposeSelfLeftMatrixMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParTransposeSelfLeftMatrixMultTest.java
@@ -46,9 +46,10 @@ public class ParTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -72,13 +73,23 @@ public class ParTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -107,13 +118,13 @@ public class ParTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -146,8 +157,10 @@ public class ParTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -170,5 +183,8 @@ public class ParTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
index dde370d..ebc1159 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
@@ -48,9 +48,10 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	public enum AggType {
@@ -89,13 +90,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
 	}
 	
 	@Test
@@ -124,13 +135,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
@@ -159,13 +170,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
 	}
 	
 	@Test
@@ -194,13 +215,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
@@ -229,13 +250,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
 	}
 	
 	@Test
@@ -264,13 +295,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
@@ -299,13 +330,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
@@ -334,13 +375,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
@@ -369,13 +410,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
@@ -404,13 +455,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
@@ -439,13 +490,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
 	}
 	
 	@Test
@@ -474,13 +535,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
@@ -493,7 +554,6 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.CONST, AggType.SUMSQ, false);
 	}
 	
-
 	@Test
 	public void testRowMaxsDenseRandDataCompression() {
 		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND, AggType.ROWMAXS, true);
@@ -510,13 +570,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
 	}
 	
 	@Test
@@ -545,13 +615,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
@@ -580,13 +650,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
 	}
 	
 	@Test
@@ -615,13 +695,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
@@ -650,13 +730,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
 	}
 	
 	@Test
@@ -685,13 +775,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
@@ -720,13 +810,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
 	}
 	
 	@Test
@@ -755,13 +855,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
@@ -790,13 +890,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
 	}
 	
 	@Test
@@ -825,13 +935,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
@@ -860,13 +970,23 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
 	}
 	
 	@Test
@@ -895,13 +1015,13 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
@@ -933,8 +1053,10 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols1, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
 			
@@ -979,5 +1101,8 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParVectorMatrixMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParVectorMatrixMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParVectorMatrixMultTest.java
index bbf3dea..5281404 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParVectorMatrixMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParVectorMatrixMultTest.java
@@ -49,9 +49,10 @@ public class ParVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class ParVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class ParVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -149,8 +160,10 @@ public class ParVectorMatrixMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(1, rows, 1, 1, 1.0, 3));
@@ -177,5 +190,8 @@ public class ParVectorMatrixMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }



[4/5] incubator-systemml git commit: [SYSTEMML-449] Compressed linear algebra v2

Posted by mb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
index efdcc86..6d2ec43 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
@@ -22,31 +22,29 @@ package org.apache.sysml.runtime.compress;
 import java.util.Arrays;
 import java.util.Iterator;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.utils.ConverterUtils;
 import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
 import org.apache.sysml.runtime.functionobjects.Builtin;
 import org.apache.sysml.runtime.functionobjects.KahanFunction;
 import org.apache.sysml.runtime.functionobjects.KahanPlus;
-import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
-import org.apache.sysml.runtime.functionobjects.ReduceAll;
-import org.apache.sysml.runtime.functionobjects.ReduceCol;
-import org.apache.sysml.runtime.functionobjects.ReduceRow;
-import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
 import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.Pair;
-import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
 import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
 
 
 /** A group of columns compressed with a single run-length encoded bitmap. */
-public class ColGroupRLE extends ColGroupBitmap 
+public class ColGroupRLE extends ColGroupOffset 
 {
 	private static final long serialVersionUID = 7450232907594748177L;
 
+	private static final Log LOG = LogFactory.getLog(ColGroupRLE.class.getName());
+	
 	public ColGroupRLE() {
-		super(CompressionType.RLE_BITMAP);
+		super();
 	}
 	
 	/**
@@ -62,26 +60,37 @@ public class ColGroupRLE extends ColGroupBitmap
 	 */
 	public ColGroupRLE(int[] colIndices, int numRows, UncompressedBitmap ubm) 
 	{
-		super(CompressionType.RLE_BITMAP, colIndices, numRows, ubm);
+		super(colIndices, numRows, ubm);
 		
 		// compress the bitmaps
 		final int numVals = ubm.getNumValues();
 		char[][] lbitmaps = new char[numVals][];
 		int totalLen = 0;
 		for( int k=0; k<numVals; k++ ) {
-			lbitmaps[k] = BitmapEncoder.genRLEBitmap(ubm.getOffsetsList(k));
+			lbitmaps[k] = BitmapEncoder.genRLEBitmap(
+				ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k));
 			totalLen += lbitmaps[k].length;
 		}
 		
 		// compact bitmaps to linearized representation
 		createCompressedBitmaps(numVals, totalLen, lbitmaps);
+		
+		//debug output
+		double ucSize = MatrixBlock.estimateSizeDenseInMemory(numRows, colIndices.length);
+		if( estimateInMemorySize() > ucSize )
+			LOG.warn("RLE group larger than UC dense: "+estimateInMemorySize()+" "+ucSize);
 	}
 
 	public ColGroupRLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps, int[] bitmapOffs) {
-		super(CompressionType.RLE_BITMAP, colIndices, numRows, zeros, values);
+		super(colIndices, numRows, zeros, values);
 		_data = bitmaps;
 		_ptr = bitmapOffs;
 	}
+	
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.RLE_BITMAP;
+	}
 
 	@Override
 	public Iterator<Integer> getDecodeIterator(int k) {
@@ -247,7 +256,7 @@ public class ColGroupRLE extends ColGroupBitmap
 			//L3 cache alignment, see comment rightMultByVector OLE column group 
 			//core difference of RLE to OLE is that runs are not segment alignment,
 			//which requires care of handling runs crossing cache-buckets
-			final int blksz = ColGroupBitmap.WRITE_CACHE_BLKSZ; 
+			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ; 
 			
 			//step 1: prepare position and value arrays
 			
@@ -335,7 +344,7 @@ public class ColGroupRLE extends ColGroupBitmap
 		if( LOW_LEVEL_OPT && numVals > 1 
 			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
 		{
-			final int blksz = ColGroupBitmap.READ_CACHE_BLKSZ; 
+			final int blksz = ColGroupOffset.READ_CACHE_BLKSZ; 
 			
 			//step 1: prepare position and value arrays
 			
@@ -423,7 +432,7 @@ public class ColGroupRLE extends ColGroupBitmap
 		}
 		
 		double[] rvalues = applyScalarOp(op, val0, getNumCols());		
-		char[] lbitmap = BitmapEncoder.genRLEBitmap(loff);
+		char[] lbitmap = BitmapEncoder.genRLEBitmap(loff, loff.length);
 		char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length);
 		System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
 		int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1);
@@ -432,49 +441,9 @@ public class ColGroupRLE extends ColGroupBitmap
 		return new ColGroupRLE(_colIndexes, _numRows, loff.length<_numRows,
 				rvalues, rbitmaps, rbitmapOffs);
 	}
-	
-	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) 
-		throws DMLRuntimeException
-	{
-		unaryAggregateOperations(op, result, 0, getNumRows());
-	}
-	
-	
-	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) 
-		throws DMLRuntimeException 
-	{
-		//sum and sumsq (reduceall/reducerow over tuples and counts)
-		if( op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq ) 
-		{
-			KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ?
-					KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject();
-			
-			if( op.indexFn instanceof ReduceAll )
-				computeSum(result, kplus);
-			else if( op.indexFn instanceof ReduceCol )
-				computeRowSums(result, kplus, rl, ru);
-			else if( op.indexFn instanceof ReduceRow )
-				computeColSums(result, kplus);
-		}
-		//min and max (reduceall/reducerow over tuples only)
-		else if(op.aggOp.increOp.fn instanceof Builtin 
-				&& (((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX 
-				|| ((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MIN)) 
-		{		
-			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
 
-			if( op.indexFn instanceof ReduceAll )
-				computeMxx(result, builtin);
-			else if( op.indexFn instanceof ReduceCol )
-				computeRowMxx(result, builtin, rl, ru);
-			else if( op.indexFn instanceof ReduceRow )
-				computeColMxx(result, builtin);
-		}
-	}
-
-	private void computeSum(MatrixBlock result, KahanFunction kplus)
+	@Override
+	protected final void computeSum(MatrixBlock result, KahanFunction kplus)
 	{
 		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
 		
@@ -502,37 +471,93 @@ public class ColGroupRLE extends ColGroupBitmap
 		result.quickSetValue(0, 1, kbuff._correction);
 	}
 
-	private void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
+	@Override
+	protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
 	{
 		KahanObject kbuff = new KahanObject(0, 0);
+		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+		
 		final int numVals = getNumValues();
 		double[] c = result.getDenseBlock();
 		
-		for (int k = 0; k < numVals; k++) {
-			int boff = _ptr[k];
-			int blen = len(k);
-			double val = sumValues(k);
+		if( ALLOW_CACHE_CONSCIOUS_ROWSUMS 
+			&& LOW_LEVEL_OPT && numVals > 1 
+			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
+		{
+			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ/2; 
+			
+			//step 1: prepare position and value arrays
+			
+			//current pos / values per RLE list
+			int[] astart = new int[numVals];
+			int[] apos = skipScan(numVals, rl, astart);
+			double[] aval = sumAllValues(kplus, kbuff);
+			
+			//step 2: cache conscious matrix-vector via horizontal scans 
+			for( int bi=rl; bi<ru; bi+=blksz ) 
+			{
+				int bimax = Math.min(bi+blksz, ru);
+					
+				//horizontal segment scan, incl pos maintenance
+				for (int k = 0; k < numVals; k++) {
+					int boff = _ptr[k];
+					int blen = len(k);
+					double val = aval[k];
+					int bix = apos[k];
+					int start = astart[k];
+					
+					//compute partial results, not aligned
+					while( bix<blen ) {
+						int lstart = _data[boff + bix];
+						int llen = _data[boff + bix + 1];
+						int from = Math.max(bi, start+lstart);
+						int to = Math.min(start+lstart+llen,bimax);
+						for (int rix=from; rix<to; rix++) {
+							kbuff.set(c[2*rix], c[2*rix+1]);
+							kplus2.execute2(kbuff, val);
+							c[2*rix] = kbuff._sum;
+							c[2*rix+1] = kbuff._correction;
+						}
+						if(start+lstart+llen >= bimax)
+							break;
+						start += lstart + llen;
+						bix += 2;
+					}
 					
-			if (val != 0.0) {
-				Pair<Integer,Integer> tmp = skipScanVal(k, rl);
-				int bix = tmp.getKey();
-				int curRunStartOff = tmp.getValue();
-				int curRunEnd = tmp.getValue();
-				for ( ; bix<blen && curRunEnd<ru; bix+=2) {
-					curRunStartOff = curRunEnd + _data[boff+bix];
-					curRunEnd = curRunStartOff + _data[boff+bix+1];
-					for (int rix=curRunStartOff; rix<curRunEnd && rix<ru; rix++) {
-						kbuff.set(c[2*rix], c[2*rix+1]);
-						kplus.execute2(kbuff, val);
-						c[2*rix] = kbuff._sum;
-						c[2*rix+1] = kbuff._correction;
+					apos[k] = bix;	
+					astart[k] = start;
+				}
+			}
+		}
+		else
+		{
+			for (int k = 0; k < numVals; k++) {
+				int boff = _ptr[k];
+				int blen = len(k);
+				double val = sumValues(k, kplus, kbuff);
+						
+				if (val != 0.0) {
+					Pair<Integer,Integer> tmp = skipScanVal(k, rl);
+					int bix = tmp.getKey();
+					int curRunStartOff = tmp.getValue();
+					int curRunEnd = tmp.getValue();
+					for ( ; bix<blen && curRunEnd<ru; bix+=2) {
+						curRunStartOff = curRunEnd + _data[boff+bix];
+						curRunEnd = curRunStartOff + _data[boff+bix+1];
+						for (int rix=curRunStartOff; rix<curRunEnd && rix<ru; rix++) {
+							kbuff.set(c[2*rix], c[2*rix+1]);
+							kplus2.execute2(kbuff, val);
+							c[2*rix] = kbuff._sum;
+							c[2*rix+1] = kbuff._correction;
+						}
 					}
 				}
 			}
 		}
 	}
 
-	private void computeColSums(MatrixBlock result, KahanFunction kplus)
+	@Override
+	protected final void computeColSums(MatrixBlock result, KahanFunction kplus)
 	{
 		KahanObject kbuff = new KahanObject(0, 0);
 		
@@ -561,7 +586,8 @@ public class ColGroupRLE extends ColGroupBitmap
 		}
 	}
 
-	private void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
+	@Override
+	protected final void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
 	{
 		//NOTE: zeros handled once for all column groups outside
 		final int numVals = getNumValues();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index 9d06bf8..6445c52 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -32,6 +32,7 @@ import org.apache.sysml.runtime.functionobjects.ReduceRow;
 import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock.Type;
 import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
 import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
 import org.apache.sysml.runtime.util.SortUtils;
@@ -53,7 +54,7 @@ public class ColGroupUncompressed extends ColGroup
 	private MatrixBlock _data;
 
 	public ColGroupUncompressed() {
-		super(CompressionType.UNCOMPRESSED, (int[])null, -1);
+		super((int[])null, -1);
 	}
 	
 	/**
@@ -71,7 +72,7 @@ public class ColGroupUncompressed extends ColGroup
 	public ColGroupUncompressed(List<Integer> colIndicesList, MatrixBlock rawblock) 
 		throws DMLRuntimeException 
 	{
-		super(CompressionType.UNCOMPRESSED, colIndicesList, 
+		super(colIndicesList, 
 				CompressedMatrixBlock.TRANSPOSE_INPUT ? 
 				rawblock.getNumColumns() : rawblock.getNumRows());
 
@@ -97,7 +98,7 @@ public class ColGroupUncompressed extends ColGroup
 			return;
 		}
 		
-		// dense implementation for dense and sparse matrices to avoid linear search
+		//dense implementation for dense and sparse matrices to avoid linear search
 		int m = numRows;
 		int n = _colIndexes.length;
 		for( int i = 0; i < m; i++) {
@@ -109,6 +110,11 @@ public class ColGroupUncompressed extends ColGroup
 			}
 		}
 		_data.examSparsity();
+		
+		//convert sparse MCSR to read-optimized CSR representation
+		if( _data.isInSparseFormat() ) {
+			_data = new MatrixBlock(_data, Type.CSR, false);
+		}
 	}
 
 	/**
@@ -121,8 +127,7 @@ public class ColGroupUncompressed extends ColGroup
 	 */
 	public ColGroupUncompressed(ArrayList<ColGroup> groupsToDecompress) 
 	{
-		super(CompressionType.UNCOMPRESSED, 
-				mergeColIndices(groupsToDecompress),
+		super(mergeColIndices(groupsToDecompress),
 				groupsToDecompress.get(0)._numRows);
 
 		// Invert the list of column indices
@@ -152,10 +157,14 @@ public class ColGroupUncompressed extends ColGroup
 	 */
 	public ColGroupUncompressed(int[] colIndices, int numRows, MatrixBlock data) 
 	{
-		super(CompressionType.UNCOMPRESSED, colIndices, numRows);
+		super(colIndices, numRows);
 		_data = data;
 	}
 
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.UNCOMPRESSED;
+	}
 
 	/**
 	 * Access for superclass
@@ -276,6 +285,23 @@ public class ColGroupUncompressed extends ColGroup
 		LibMatrixMult.matrixMult(_data, shortVector, result, rl, ru);	
 	}
 	
+	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int k)
+			throws DMLRuntimeException 
+	{
+		// Pull out the relevant rows of the vector
+		int clen = _colIndexes.length;
+		
+		MatrixBlock shortVector = new MatrixBlock(clen, 1, false);
+		shortVector.allocateDenseBlock();
+		double[] b = shortVector.getDenseBlock();
+		for (int colIx = 0; colIx < clen; colIx++)
+			b[colIx] = vector.quickGetValue(_colIndexes[colIx], 0);
+		shortVector.recomputeNonZeros();
+		
+		// Multiply the selected columns by the appropriate parts of the vector
+		LibMatrixMult.matrixMult(_data, shortVector, result, k);	
+	}
+	
 	@Override
 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
 			throws DMLRuntimeException 
@@ -377,8 +403,7 @@ public class ColGroupUncompressed extends ColGroup
 	}
 	
 	@Override
-	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru)
-	{
+	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
 		for( int i=rl; i<ru; i++ )
 			rnnz[i-rl] += _data.recomputeNonZeros(i, i, 0, _data.getNumColumns()-1);
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
new file mode 100644
index 0000000..b3b5e80
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.Builtin;
+import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+
+/**
+ * Base class for column groups encoded with value dictionary.
+ * 
+ */
+public abstract class ColGroupValue extends ColGroup 
+{	
+	private static final long serialVersionUID = 3786247536054353658L;
+		
+	public static boolean LOW_LEVEL_OPT = true;	
+	
+	//sorting of values by physical length helps by 10-20%, especially for serial, while
+	//slight performance decrease for parallel incl multi-threaded, hence not applied for
+	//distributed operations (also because compression time + garbage collection increases)
+	public static final boolean SORT_VALUES_BY_LENGTH = true; 
+		
+	
+	/** Distinct values associated with individual bitmaps. */
+	protected double[] _values; //linearized <numcol vals> <numcol vals>
+	
+	public ColGroupValue() {
+		super((int[]) null, -1);
+	}
+	
+	/**
+	 * Main constructor. Stores the headers for the individual bitmaps.
+	 * 
+	 * @param colIndices
+	 *            indices (within the block) of the columns included in this
+	 *            column
+	 * @param numRows
+	 *            total number of rows in the parent block
+	 * @param ubm
+	 *            Uncompressed bitmap representation of the block
+	 */
+	public ColGroupValue(int[] colIndices, int numRows, UncompressedBitmap ubm) 
+	{
+		super(colIndices, numRows);
+
+		// sort values by frequency, if requested 
+		if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH 
+				&& numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) {
+			ubm.sortValuesByFrequency();
+		}
+
+		// extract and store distinct values (bitmaps handled by subclasses)
+		_values = ubm.getValues();
+	}
+
+	/**
+	 * Constructor for subclass methods that need to create shallow copies
+	 * 
+	 * @param colIndices
+	 *            raw column index information
+	 * @param numRows
+	 *            number of rows in the block
+	 * @param values
+	 *            set of distinct values for the block (associated bitmaps are
+	 *            kept in the subclass)
+	 */
+	protected ColGroupValue(int[] colIndices, int numRows, double[] values) {
+		super(colIndices, numRows);
+		_values = values;
+	}
+	
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		
+		// adding the size of values
+		size += 8; //array reference
+		if (_values != null) {
+			size += 32 + _values.length * 8; //values
+		}
+	
+		return size;
+	}
+
+	/**
+	 * Obtain number of distrinct sets of values associated with the bitmaps in this column group.
+	 * 
+	 * @return the number of distinct sets of values associated with the bitmaps
+	 *         in this column group
+	 */
+	public int getNumValues() {
+		return _values.length / _colIndexes.length;
+	}
+
+	public double[] getValues() {
+		return _values;
+	}
+	
+	protected int containsAllZeroValue() {
+		int numVals = getNumValues();
+		int numCols = getNumCols();
+		for( int i=0, off=0; i<numVals; i++, off+=numCols ) {
+			boolean allZeros = true;
+			for( int j=0; j<numCols; j++ )
+				allZeros &= (_values[off+j] == 0);
+			if( allZeros )
+				return i;
+		}
+		return -1;
+	}
+	
+	protected final double sumValues(int valIx) {
+		final int numCols = getNumCols();
+		final int valOff = valIx * numCols;
+		double val = 0.0;
+		for( int i = 0; i < numCols; i++ ) {
+			val += _values[valOff+i];
+		}
+		
+		return val;
+	}
+	
+	protected final double sumValues(int valIx, KahanFunction kplus, KahanObject kbuff) {
+		final int numCols = getNumCols();
+		final int valOff = valIx * numCols;
+		kbuff.set(0, 0);
+		for( int i = 0; i < numCols; i++ ) {
+			kplus.execute2(kbuff, _values[valOff+i]);
+		}
+		
+		return kbuff._sum;
+	}
+	
+	protected final double[] sumAllValues(KahanFunction kplus, KahanObject kbuff) {
+		//quick path: sum 
+		if( getNumCols()==1 && kplus instanceof KahanPlus )
+			return _values; //shallow copy of values
+		
+		//pre-aggregate value tuple 
+		final int numVals = getNumValues();
+		double[] ret = new double[numVals];
+		for( int k=0; k<numVals; k++ )
+			ret[k] = sumValues(k, kplus, kbuff);
+		
+		return ret;
+	}
+	
+	protected final double sumValues(int valIx, double[] b) {
+		final int numCols = getNumCols();
+		final int valOff = valIx * numCols;
+		double val = 0;
+		for( int i = 0; i < numCols; i++ ) {
+			val += _values[valOff+i] * b[i];
+		}
+		
+		return val;
+	}
+
+	protected final double[] preaggValues(int numVals, double[] b) {
+		double[] ret = new double[numVals];
+		for( int k = 0; k < numVals; k++ )
+			ret[k] = sumValues(k, b);
+		
+		return ret;
+	}
+	
+	/**
+	 * NOTE: Shared across OLE/RLE/DDC because value-only computation. 
+	 * 
+	 * @param result output matrix block
+	 * @param builtin function object
+	 * @param zeros indicator if column group contains zero values
+	 */
+	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) 
+	{
+		//init and 0-value handling
+		double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1);
+		if( zeros )
+			val = builtin.execute2(val, 0);
+		
+		//iterate over all values only
+		final int numVals = getNumValues();
+		final int numCols = getNumCols();		
+		for (int k = 0; k < numVals; k++)
+			for( int j=0, valOff = k*numCols; j<numCols; j++ )
+				val = builtin.execute2(val, _values[ valOff+j ]);
+		
+		//compute new partial aggregate
+		val = builtin.execute2(val, result.quickGetValue(0, 0));
+		result.quickSetValue(0, 0, val);
+	}
+	
+	/**
+	 * NOTE: Shared across OLE/RLE/DDC because value-only computation. 
+	 * 
+	 * @param result output matrix block
+	 * @param builtin function object
+	 * @param zeros indicator if column group contains zero values
+	 */
+	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros)
+	{
+		final int numVals = getNumValues();
+		final int numCols = getNumCols();
+		
+		//init and 0-value handling
+		double[] vals = new double[numCols];
+		Arrays.fill(vals, Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1));
+		if( zeros ) {
+			for( int j = 0; j < numCols; j++ )
+				vals[j] = builtin.execute2(vals[j], 0);		
+		}
+		
+		//iterate over all values only
+		for (int k = 0; k < numVals; k++) 
+			for( int j=0, valOff=k*numCols; j<numCols; j++ )
+				vals[j] = builtin.execute2(vals[j], _values[ valOff+j ]);
+		
+		//copy results to output
+		for( int j=0; j<numCols; j++ )
+			result.quickSetValue(0, _colIndexes[j], vals[j]);
+	}
+	
+	/**
+	 * Method for use by subclasses. Applies a scalar operation to the value
+	 * metadata stored in the superclass.
+	 * 
+	 * @param op
+	 *            scalar operation to perform
+	 * @return transformed copy of value metadata for this column group
+	 * @throws DMLRuntimeException if DMLRuntimeException occurs
+	 */
+	protected double[] applyScalarOp(ScalarOperator op)
+		throws DMLRuntimeException 
+	{
+		//scan over linearized values
+		double[] ret = new double[_values.length];
+		for (int i = 0; i < _values.length; i++) {
+			ret[i] = op.executeScalar(_values[i]);
+		}
+
+		return ret;
+	}
+
+	protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
+		throws DMLRuntimeException 
+	{
+		//scan over linearized values
+		double[] ret = new double[_values.length + numCols];
+		for( int i = 0; i < _values.length; i++ ) {
+			ret[i] = op.executeScalar(_values[i]);
+		}
+		
+		//add new value to the end
+		Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
+		
+		return ret;
+	}
+	
+	@Override
+	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) 
+		throws DMLRuntimeException 
+	{
+		unaryAggregateOperations(op, result, 0, getNumRows());
+	}
+	
+	/**
+	 * 
+	 * @param op aggregation operator
+	 * @param result output matrix block
+	 * @param rl row lower index, inclusive
+	 * @param ru row upper index, exclusive
+	 * @throws DMLRuntimeException
+	 */
+	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru)
+		throws DMLRuntimeException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index 48ebcc5..84c4812 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -46,6 +46,7 @@ import org.apache.sysml.lops.MMTSJ.MMTSJType;
 import org.apache.sysml.lops.MapMultChain.ChainType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.ColGroup.CompressionType;
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder;
 import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
 import org.apache.sysml.runtime.compress.estim.SizeEstimatorFactory;
@@ -56,12 +57,14 @@ import org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
 import org.apache.sysml.runtime.functionobjects.Builtin;
 import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
 import org.apache.sysml.runtime.functionobjects.KahanPlus;
 import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysml.runtime.functionobjects.Multiply;
 import org.apache.sysml.runtime.functionobjects.ReduceAll;
 import org.apache.sysml.runtime.functionobjects.ReduceCol;
 import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.data.CTableMap;
 import org.apache.sysml.runtime.matrix.data.LibMatrixBincell;
@@ -98,7 +101,9 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	public static final boolean MATERIALIZE_ZEROS = false;
 	public static final long MIN_PAR_AGG_THRESHOLD = 16*1024*1024; //16MB
 	public static final boolean INVESTIGATE_ESTIMATES = false;
-	private static final boolean LDEBUG = false; //local debug flag
+	public static boolean ALLOW_DDC_ENCODING = true;
+	private static final boolean LDEBUG = true; //local debug flag
+	private static final Level LDEBUG_LEVEL = Level.DEBUG; //DEBUG/TRACE for details
 	
 	private static final Log LOG = LogFactory.getLog(CompressedMatrixBlock.class.getName());
 	
@@ -106,7 +111,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		// for internal debugging only
 		if( LDEBUG ) {
 			Logger.getLogger("org.apache.sysml.runtime.compress")
-				  .setLevel((Level) Level.DEBUG);
+				  .setLevel((Level) LDEBUG_LEVEL);
 		}	
 	}
 	
@@ -231,7 +236,6 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		final int numRows = getNumRows();
 		final int numCols = getNumColumns();
 		final boolean sparse = isInSparseFormat();
-		final double sp = OptimizerUtils.getSparsity(numRows, numCols, getNonZeros());
 		MatrixBlock rawblock = !TRANSPOSE_INPUT ? new MatrixBlock(this) :
 			LibMatrixReorg.transpose(this, new MatrixBlock(numCols, numRows, sparse), k);
 		
@@ -239,45 +243,50 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		CompressedSizeEstimator bitmapSizeEstimator = 
 				SizeEstimatorFactory.getSizeEstimator(rawblock, numRows);
 
-		// The current implementation of this method is written for correctness,
-		// not for performance or for minimal use of temporary space.
-
-		// We start with a full set of columns.
-		HashSet<Integer> remainingCols = new HashSet<Integer>();
-		for (int i = 0; i < numCols; i++)
-			remainingCols.add(i);
-
 		// PHASE 1: Classify columns by compression type
-		// We start by determining which columns are amenable to bitmap compression
-		double uncompressedColumnSize = getUncompressedSize(numRows, 1, sp);
-
-		// information about the bitmap amenable columns
-		List<Integer> bitmapCols = new ArrayList<Integer>();
-		List<Integer> uncompressedCols = new ArrayList<Integer>();
-		List<Integer> colsCards = new ArrayList<Integer>();
-		List<Long> compressedSizes = new ArrayList<Long>();
-		HashMap<Integer, Double> compressionRatios = new HashMap<Integer, Double>();
+		// We start by determining which columns are amenable to compression
+		List<Integer> colsC = new ArrayList<Integer>();
+		List<Integer> colsUC = new ArrayList<Integer>();
+		HashMap<Integer, Double> compRatios = new HashMap<Integer, Double>();
 		
-		// Classify columns according to ration (size uncompressed / size compressed), 
+		// Classify columns according to ratio (size uncompressed / size compressed), 
 		// where a column is compressible if ratio > 1.
 		CompressedSizeInfo[] sizeInfos = (k > 1) ?
 				computeCompressedSizeInfos(bitmapSizeEstimator, numCols, k) : 
-				computeCompressedSizeInfos(bitmapSizeEstimator, numCols);		
+				computeCompressedSizeInfos(bitmapSizeEstimator, numCols);	
+		long nnzUC = 0;		
 		for (int col = 0; col < numCols; col++)  {	
-			long compressedSize = sizeInfos[col].getMinSize();
-			double compRatio = uncompressedColumnSize / compressedSize;			
-			if (compRatio > 1) {
-				bitmapCols.add(col);
-				compressionRatios.put(col, compRatio);
-				colsCards.add(sizeInfos[col].getEstCarinality());
-				compressedSizes.add(compressedSize);
+			double uncompSize = getUncompressedSize(numRows, 1, 
+				OptimizerUtils.getSparsity(numRows, 1, sizeInfos[col].getEstNnz()));
+			double compRatio = uncompSize / sizeInfos[col].getMinSize();			
+			if( compRatio > 1 ) {
+				colsC.add(col);
+				compRatios.put(col, compRatio);
+			}
+			else {
+				colsUC.add(col); 
+				nnzUC += sizeInfos[col].getEstNnz();
 			}
-			else
-				uncompressedCols.add(col);
 		}
-
-		_stats.timePhase1 = time.stop();
+		
+		// correction of column classification (reevaluate dense estimates if necessary)
+		boolean sparseUC = MatrixBlock.evalSparseFormatInMemory(numRows, colsUC.size(), nnzUC);
+		if( !sparseUC && !colsUC.isEmpty() ) {
+			for( int i=0; i<colsUC.size(); i++ ) {
+				int col = colsUC.get(i);
+				double uncompSize = getUncompressedSize(numRows, 1, 1.0);
+				double compRatio = uncompSize / sizeInfos[col].getMinSize();			
+				if( compRatio > 1 ) {
+					colsC.add(col);
+					colsUC.remove(i); i--;
+					compRatios.put(col, compRatio);
+					nnzUC -= sizeInfos[col].getEstNnz();
+				}
+			}
+		}
+		
 		if( LOG.isDebugEnabled() ) {
+			_stats.timePhase1 = time.stop();
 			LOG.debug("Compression statistics:");
 			LOG.debug("--compression phase 1: "+_stats.timePhase1);
 		}
@@ -285,26 +294,28 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		// PHASE 2: Grouping columns
 		// Divide the bitmap columns into column groups.
 		List<int[]> bitmapColGrps = PlanningCoCoder.findCocodesByPartitioning(
-				bitmapSizeEstimator, bitmapCols, colsCards, compressedSizes, numRows, 
-				isInSparseFormat() ? sp : 1, k);
+				bitmapSizeEstimator, colsC, sizeInfos, numRows, k);
 
-		_stats.timePhase2 = time.stop();
-		if( LOG.isDebugEnabled() )
+		if( LOG.isDebugEnabled() ) {
+			_stats.timePhase2 = time.stop();
 			LOG.debug("--compression phase 2: "+_stats.timePhase2);
-		
+		}
+			
 		if( INVESTIGATE_ESTIMATES ) {
 			double est = 0;
 			for( int[] groupIndices : bitmapColGrps )
 				est += bitmapSizeEstimator.estimateCompressedColGroupSize(groupIndices).getMinSize();
-			est += uncompressedCols.size() * uncompressedColumnSize;
+			est += MatrixBlock.estimateSizeInMemory(numRows, colsUC.size(), 
+					OptimizerUtils.getSparsity(numRows, colsUC.size(), nnzUC));
 			_stats.estSize = est;
 		}
 		
 		// PHASE 3: Compress and correct sample-based decisions
 		ColGroup[] colGroups = (k > 1) ?
-				compressColGroups(rawblock, bitmapSizeEstimator, compressionRatios, numRows, sp, bitmapColGrps, k) : 
-				compressColGroups(rawblock, bitmapSizeEstimator, compressionRatios, numRows, sp, bitmapColGrps); 	
+				compressColGroups(rawblock, bitmapSizeEstimator, compRatios, numRows, bitmapColGrps, colsUC.isEmpty(), k) : 
+				compressColGroups(rawblock, bitmapSizeEstimator, compRatios, numRows, bitmapColGrps, colsUC.isEmpty()); 	
 		allocateColGroupList();
+		HashSet<Integer> remainingCols = seq(0, numCols-1, 1);
 		for( int j=0; j<colGroups.length; j++ ) {
 			if( colGroups[j] != null ) {
 				for( int col : colGroups[j].getColIndices() )
@@ -313,10 +324,11 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			}
 		}
 		
-		_stats.timePhase3 = time.stop();
-		if( LOG.isDebugEnabled() )
+		if( LOG.isDebugEnabled() ) {
+			_stats.timePhase3 = time.stop();
 			LOG.debug("--compression phase 3: "+_stats.timePhase3);
-		
+		}
+			
 		// Phase 4: Cleanup
 		// The remaining columns are stored uncompressed as one big column group
 		if( !remainingCols.isEmpty() ) {
@@ -332,10 +344,15 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		rawblock.cleanupBlock(true, true);
 		this.cleanupBlock(true, true);
 		
-		_stats.timePhase4 = time.stop();
 		if( LOG.isDebugEnabled() ) {
+			_stats.timePhase4 = time.stop();
+			int[] counts = getColGroupCounts(_colGroups);
 			LOG.debug("--compression phase 4: "+_stats.timePhase4);
 			LOG.debug("--num col groups: "+_colGroups.size());
+			LOG.debug("--col groups types (OLE,RLE,DDC1,DDC2,UC): "
+					+counts[2]+","+counts[1]+","+counts[3]+","+counts[4]+","+counts[0]);
+			LOG.debug("--col groups sizes (OLE,RLE,DDC1,DDC2,UC): "
+					+counts[7]+","+counts[6]+","+counts[8]+","+counts[9]+","+counts[5]);
 			LOG.debug("--compressed size: "+_stats.size);
 			LOG.debug("--compression ratio: "+_stats.ratio);
 		}
@@ -345,6 +362,22 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		return _stats;
 	}
 
+	/**
+	 * Get array of counts regarding col group types. The position
+	 * corresponds with the enum ordinal. 
+	 * 
+	 * @param colgroups list of column groups
+	 * @return counts 
+	 */
+	private static int[] getColGroupCounts(ArrayList<ColGroup> colgroups) {
+		int[] ret = new int[10]; //5 x count, 5 x num_columns
+		for( ColGroup c : colgroups ) {
+			ret[c.getCompType().ordinal()] ++;
+			ret[5+c.getCompType().ordinal()] += c.getNumCols();
+		}
+		return ret;
+	}
+	
 	private static CompressedSizeInfo[] computeCompressedSizeInfos(CompressedSizeEstimator estim, int clen) {
 		CompressedSizeInfo[] ret = new CompressedSizeInfo[clen];
 		for( int col=0; col<clen; col++ )
@@ -372,23 +405,23 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		}
 	}
 
-	private static ColGroup[] compressColGroups(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, double sp, List<int[]> groups)
+	private static ColGroup[] compressColGroups(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, List<int[]> groups, boolean denseEst)
 	{
 		ColGroup[] ret = new ColGroup[groups.size()];
 		for( int i=0; i<groups.size(); i++ )
-			ret[i] = compressColGroup(in, estim, compRatios, rlen, sp, groups.get(i));
+			ret[i] = compressColGroup(in, estim, compRatios, rlen, groups.get(i), denseEst);
 		
 		return ret;
 	}
 
-	private static ColGroup[] compressColGroups(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, double sp, List<int[]> groups, int k) 
+	private static ColGroup[] compressColGroups(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, List<int[]> groups, boolean denseEst, int k) 
 		throws DMLRuntimeException
 	{
 		try {
 			ExecutorService pool = Executors.newFixedThreadPool( k );
 			ArrayList<CompressTask> tasks = new ArrayList<CompressTask>();
 			for( int[] colIndexes : groups )
-				tasks.add(new CompressTask(in, estim, compRatios, rlen, sp, colIndexes));
+				tasks.add(new CompressTask(in, estim, compRatios, rlen, colIndexes, denseEst));
 			List<Future<ColGroup>> rtask = pool.invokeAll(tasks);	
 			ArrayList<ColGroup> ret = new ArrayList<ColGroup>();
 			for( Future<ColGroup> lrtask : rtask )
@@ -401,7 +434,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		}
 	}
 
-	private static ColGroup compressColGroup(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, double sp, int[] colIndexes) 
+	private static ColGroup compressColGroup(MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, int[] colIndexes, boolean denseEst) 
 	{
 		int[] allGroupIndices = null;
 		int allColsCount = colIndexes.length;
@@ -416,12 +449,13 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			//exact big list and observe compression ratio
 			ubm = BitmapEncoder.extractBitmap(colIndexes, in); 
 			sizeInfo = estim.estimateCompressedColGroupSize(ubm);	
-			double compRatio = getUncompressedSize(rlen, colIndexes.length, sp) / sizeInfo.getMinSize();
-			
+			double sp2 = denseEst ? 1.0 : OptimizerUtils.getSparsity(rlen, 1, ubm.getNumOffsets());
+			double compRatio = getUncompressedSize(rlen, colIndexes.length, sp2) / sizeInfo.getMinSize();
+		
 			if( compRatio > 1 ) {
 				break; // we have a good group
 			} 
-			
+
 			// modify the group
 			if (compRatioPQ == null) {
 				// first modification
@@ -454,9 +488,17 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		//create compressed column group
 		long rleSize = sizeInfo.getRLESize();
 		long oleSize = sizeInfo.getOLESize();
-		if( rleSize < oleSize )
+		long ddcSize = sizeInfo.getDDCSize();
+		
+		if( ALLOW_DDC_ENCODING && ddcSize < rleSize && ddcSize < oleSize ) {
+			if( ubm.getNumValues()<=255 )
+				return new ColGroupDDC1(colIndexes, rlen, ubm);
+			else
+				return new ColGroupDDC2(colIndexes, rlen, ubm);	
+		}
+		else if( rleSize < oleSize )
 			return new ColGroupRLE(colIndexes, rlen, ubm);
-		else
+		else 
 			return new ColGroupOLE(colIndexes, rlen, ubm);
 	}
 	
@@ -469,10 +511,11 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	 * @return estimate of uncompressed size of column group
 	 */
 	private static double getUncompressedSize(int rlen, int clen, double sparsity) {
-		//we estimate the uncompressed size as 8 * nnz in order to cover both
-		//sparse and dense with moderate underestimation (which is conservative as 
-		//it is biased towards uncompressed columns)
-		return 8 * rlen * clen * sparsity;
+		//we estimate the uncompressed size as the minimum of dense representation
+		//and representation in csr, which moderately overestimates sparse representations
+		//of single columns but helps avoid anomalies with sparse columns that are
+		//eventually represented in dense
+		return Math.min(8d * rlen * clen, 4d * rlen + 12d * rlen * clen * sparsity);
 	}
 
 	/**
@@ -587,8 +630,8 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	}
 
 	private static class CompressedColumn implements Comparable<CompressedColumn> {
-		int colIx;
-		double compRatio;
+		final int colIx;
+		final double compRatio;
 
 		public CompressedColumn(int colIx, double compRatio) {
 			this.colIx = colIx;
@@ -613,6 +656,13 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		public CompressionStatistics() {
 			//do nothing
 		}
+		
+		public CompressionStatistics(double t1, double t2, double t3, double t4){
+			timePhase1 = t1;
+			timePhase2 = t2;
+			timePhase3 = t3;
+			timePhase4 = t4;
+		}
 	} 
 
 	@Override
@@ -681,6 +731,10 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 					grp = new ColGroupOLE(); break;
 				case RLE_BITMAP:
 					grp = new ColGroupRLE(); break;
+				case DDC1:
+					grp = new ColGroupDDC1(); break;
+				case DDC2:
+					grp = new ColGroupDDC2(); break;	
 			}
 			
 			//deserialize and add column group
@@ -1040,11 +1094,22 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 				
 				//aggregate partial results
 				if( op.indexFn instanceof ReduceAll ) {
-					double val = ret.quickGetValue(0, 0);
-					for( Future<MatrixBlock> rtask : rtasks )
-						val = op.aggOp.increOp.fn.execute(val, 
-								rtask.get().quickGetValue(0, 0));
-					ret.quickSetValue(0, 0, val);
+					if( op.aggOp.increOp.fn instanceof KahanFunction ) {
+						KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
+						for( Future<MatrixBlock> rtask : rtasks ) {
+							double tmp = rtask.get().quickGetValue(0, 0);
+							((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
+						}
+						ret.quickSetValue(0, 0, kbuff._sum);
+					}	
+					else {
+						double val = ret.quickGetValue(0, 0);
+						for( Future<MatrixBlock> rtask : rtasks ) {
+							double tmp = rtask.get().quickGetValue(0, 0);
+							val = op.aggOp.increOp.fn.execute(val, tmp);
+						}
+						ret.quickSetValue(0, 0, val);
+					}
 				}		
 			}
 			catch(Exception ex) {
@@ -1058,9 +1123,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 					grp.unaryAggregateOperations(op, ret);
 			
 			//process OLE/RLE column groups
-			for (ColGroup grp : _colGroups)
-				if( !(grp instanceof ColGroupUncompressed) )
-					grp.unaryAggregateOperations(op, ret);
+			aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
 		}
 		
 		//special handling zeros for rowmins/rowmax
@@ -1089,6 +1152,41 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	}
 	
 	@Override
+	public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op,
+			MatrixValue result, int blockingFactorRow, int blockingFactorCol,
+			MatrixIndexes indexesIn) throws DMLRuntimeException {
+		return aggregateUnaryOperations(op, result, 
+				blockingFactorRow, blockingFactorCol, indexesIn, false);
+	}
+	
+	private static void aggregateUnaryOperations(AggregateUnaryOperator op, 
+			ArrayList<ColGroup> groups, MatrixBlock ret, int rl, int ru) throws DMLRuntimeException 
+	{
+		boolean cacheDDC1 = ColGroupValue.LOW_LEVEL_OPT 
+				&& op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof KahanPlus //rowSums
+				&& ColGroupOffset.ALLOW_CACHE_CONSCIOUS_ROWSUMS
+				&& ru-rl > ColGroupOffset.WRITE_CACHE_BLKSZ/2;
+			
+		//process cache-conscious DDC1 groups (adds to output)
+		if( cacheDDC1 ) {
+			ArrayList<ColGroupDDC1> tmp = new ArrayList<ColGroupDDC1>();
+			for( ColGroup grp : groups )
+				if( grp instanceof ColGroupDDC1 )
+					tmp.add((ColGroupDDC1)grp);
+			if( !tmp.isEmpty() )
+				ColGroupDDC1.computeRowSums(tmp.toArray(new ColGroupDDC1[0]), ret,
+						KahanPlus.getKahanPlusFnObject(), rl, ru);
+		}
+			
+		//process remaining groups (adds to output)
+		//note: UC group never passed into this function
+		for( ColGroup grp : groups )
+			if( !(grp instanceof ColGroupUncompressed) 
+				&& !(cacheDDC1 && grp instanceof ColGroupDDC1) )
+				((ColGroupValue)grp).unaryAggregateOperations(op, ret, rl, ru);
+	}
+	
+	@Override
 	public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype) 
 		throws DMLRuntimeException 
 	{
@@ -1204,12 +1302,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		result.allocateDenseBlock();
 
 		// delegate matrix-vector operation to each column group
-		for( ColGroup grp : _colGroups )
-			if( grp instanceof ColGroupUncompressed ) //overwrites output
-				grp.rightMultByVector(vector, result, 0, result.getNumRows());
-		for( ColGroup grp : _colGroups )
-			if( !(grp instanceof ColGroupUncompressed) ) //adds to output
-				grp.rightMultByVector(vector, result, 0, result.getNumRows());
+		rightMultByVector(_colGroups, vector, result, true, 0, result.getNumRows());
 		
 		// post-processing
 		result.recomputeNonZeros();
@@ -1231,6 +1324,13 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 
 		//multi-threaded execution of all groups
 		try {
+			ColGroupUncompressed uc = getUncompressedColGroup();
+			
+			//compute uncompressed column group in parallel 
+			if( uc != null )
+				uc.rightMultByVector(vector, result, k);					
+			
+			//compute remaining compressed column groups in parallel
 			ExecutorService pool = Executors.newFixedThreadPool( k );
 			int rlen = getNumRows();
 			int seqsz = BitmapEncoder.BITMAP_BLOCK_SZ;
@@ -1239,15 +1339,48 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			ArrayList<RightMatrixMultTask> tasks = new ArrayList<RightMatrixMultTask>();
 			for( int i=0; i<k & i*blklen<getNumRows(); i++ )
 				tasks.add(new RightMatrixMultTask(_colGroups, vector, result, i*blklen, Math.min((i+1)*blklen,rlen)));
-			pool.invokeAll(tasks);	
+			List<Future<Long>> ret = pool.invokeAll(tasks);	
 			pool.shutdown();
+			
+			//error handling and nnz aggregation
+			long lnnz = 0;
+			for( Future<Long> tmp : ret )
+				lnnz += tmp.get(); 
+			result.setNonZeros(lnnz);
 		}
 		catch(Exception ex) {
 			throw new DMLRuntimeException(ex);
 		}
+	}
+	
+	private static void rightMultByVector(ArrayList<ColGroup> groups, MatrixBlock vect, MatrixBlock ret, boolean inclUC, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		boolean cacheDDC1 = ColGroupValue.LOW_LEVEL_OPT 
+			&& ru-rl > ColGroupOffset.WRITE_CACHE_BLKSZ;
 		
-		// post-processing
-		result.recomputeNonZeros();
+		// process uncompressed column group (overwrites output)
+		if( inclUC ) {
+			for( ColGroup grp : groups )
+				if( grp instanceof ColGroupUncompressed )
+					grp.rightMultByVector(vect, ret, rl, ru);
+		}
+		
+		//process cache-conscious DDC1 groups (adds to output)
+		if( cacheDDC1 ) {
+			ArrayList<ColGroupDDC1> tmp = new ArrayList<ColGroupDDC1>();
+			for( ColGroup grp : groups )
+				if( grp instanceof ColGroupDDC1 )
+					tmp.add((ColGroupDDC1)grp);
+			if( !tmp.isEmpty() )
+				ColGroupDDC1.rightMultByVector(tmp.toArray(new ColGroupDDC1[0]), vect, ret, rl, ru);
+		}
+		
+		//process remaining groups (adds to output)
+		for( ColGroup grp : groups )
+			if( !(grp instanceof ColGroupUncompressed) 
+				&& !(cacheDDC1 && grp instanceof ColGroupDDC1) )
+				grp.rightMultByVector(vect, ret, rl, ru);
 	}
 	
 	/**
@@ -1299,11 +1432,9 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	 * @param k number of threads
 	 * @throws DMLRuntimeException if DMLRuntimeException occurs
 	 */
-	private static void leftMultByVectorTranspose(List<ColGroup> colGroups,MatrixBlock vector, MatrixBlock result, boolean doTranspose, int k) 
+	private void leftMultByVectorTranspose(List<ColGroup> colGroups,MatrixBlock vector, MatrixBlock result, boolean doTranspose, int k) 
 		throws DMLRuntimeException 
 	{
-		int kuc = Math.max(1, k - colGroups.size() + 1);
-		
 		//transpose vector if required
 		MatrixBlock rowVector = vector;
 		if (doTranspose) {
@@ -1317,12 +1448,21 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 
 		//multi-threaded execution
 		try {
-			ExecutorService pool = Executors.newFixedThreadPool( Math.min(colGroups.size(), k) );
+			//compute uncompressed column group in parallel 
+			ColGroupUncompressed uc = getUncompressedColGroup();
+			if( uc != null )
+				uc.leftMultByRowVector(vector, result, k);					
+			
+			//compute remaining compressed column groups in parallel
+			ExecutorService pool = Executors.newFixedThreadPool( Math.min(colGroups.size()-((uc!=null)?1:0), k) );
 			ArrayList<LeftMatrixMultTask> tasks = new ArrayList<LeftMatrixMultTask>();
 			for( ColGroup grp : colGroups )
-				tasks.add(new LeftMatrixMultTask(grp, rowVector, result, kuc));
-			pool.invokeAll(tasks);	
+				if( !(grp instanceof ColGroupUncompressed) )
+					tasks.add(new LeftMatrixMultTask(grp, rowVector, result));
+			List<Future<Object>> ret = pool.invokeAll(tasks);	
 			pool.shutdown();
+			for( Future<Object> tmp : ret )
+				tmp.get(); //error handling
 		}
 		catch(Exception ex) {
 			throw new DMLRuntimeException(ex);
@@ -1405,37 +1545,32 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 
 	private static class LeftMatrixMultTask implements Callable<Object> 
 	{
-		private ColGroup _group = null;
-		private MatrixBlock _vect = null;
-		private MatrixBlock _ret = null;
-		private int _kuc = 1;
+		private final ColGroup _group;
+		private final MatrixBlock _vect;
+		private final MatrixBlock _ret;
 		
-		protected LeftMatrixMultTask( ColGroup group, MatrixBlock vect, MatrixBlock ret, int kuc)  {
+		protected LeftMatrixMultTask( ColGroup group, MatrixBlock vect, MatrixBlock ret)  {
 			_group = group;
 			_vect = vect;
 			_ret = ret;
-			_kuc = kuc;
 		}
 		
 		@Override
 		public Object call() throws DMLRuntimeException 
 		{
 			// delegate matrix-vector operation to each column group
-			if( _group instanceof ColGroupUncompressed && _kuc >1 && ColGroupBitmap.LOW_LEVEL_OPT )
-				((ColGroupUncompressed)_group).leftMultByRowVector(_vect, _ret, _kuc);
-			else
-				_group.leftMultByRowVector(_vect, _ret);
+			_group.leftMultByRowVector(_vect, _ret);
 			return null;
 		}
 	}
 
-	private static class RightMatrixMultTask implements Callable<Object> 
+	private static class RightMatrixMultTask implements Callable<Long> 
 	{
-		private ArrayList<ColGroup> _groups = null;
-		private MatrixBlock _vect = null;
-		private MatrixBlock _ret = null;
-		private int _rl = -1;
-		private int _ru = -1;
+		private final ArrayList<ColGroup> _groups;
+		private final MatrixBlock _vect;
+		private final MatrixBlock _ret;
+		private final int _rl;
+		private final int _ru;
 		
 		protected RightMatrixMultTask( ArrayList<ColGroup> groups, MatrixBlock vect, MatrixBlock ret, int rl, int ru)  {
 			_groups = groups;
@@ -1446,25 +1581,18 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		}
 		
 		@Override
-		public Object call() throws DMLRuntimeException 
-		{
-			// delegate vector-matrix operation to each column group
-			for( ColGroup grp : _groups )
-				if( grp instanceof ColGroupUncompressed ) //overwrites output
-					grp.rightMultByVector(_vect, _ret, _rl, _ru);
-			for( ColGroup grp : _groups )
-				if( !(grp instanceof ColGroupUncompressed) ) //adds to output
-					grp.rightMultByVector(_vect, _ret, _rl, _ru);
-			return null;
+		public Long call() throws DMLRuntimeException {
+			rightMultByVector(_groups, _vect, _ret, false, _rl, _ru);
+			return _ret.recomputeNonZeros(_rl, _ru-1, 0, 0);
 		}
 	}
 	
 	private static class MatrixMultTransposeTask implements Callable<Object> 
 	{
-		private ArrayList<ColGroup> _groups = null;
-		private MatrixBlock _ret = null;
-		private int _gl = -1;
-		private int _gu = -1;
+		private final ArrayList<ColGroup> _groups;
+		private final MatrixBlock _ret;
+		private final int _gl;
+		private final int _gu;
 		
 		protected MatrixMultTransposeTask(ArrayList<ColGroup> groups, MatrixBlock ret, int gl, int gu)  {
 			_groups = groups;
@@ -1482,11 +1610,11 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 	
 	private static class UnaryAggregateTask implements Callable<MatrixBlock> 
 	{
-		private ArrayList<ColGroup> _groups = null;
-		private int _rl = -1;
-		private int _ru = -1;
-		private MatrixBlock _ret = null;
-		private AggregateUnaryOperator _op = null;
+		private final ArrayList<ColGroup> _groups;
+		private final int _rl;
+		private final int _ru;
+		private final MatrixBlock _ret;
+		private final AggregateUnaryOperator _op;
 		
 		protected UnaryAggregateTask( ArrayList<ColGroup> groups, MatrixBlock ret, int rl, int ru, AggregateUnaryOperator op)  {
 			_groups = groups;
@@ -1507,18 +1635,15 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		
 		@Override
 		public MatrixBlock call() throws DMLRuntimeException {
-			// delegate unary aggregate operation to each column group
-			// (uncompressed column group handles separately)
-			for( ColGroup grp : _groups )
-				((ColGroupBitmap)grp).unaryAggregateOperations(_op, _ret, _rl, _ru);
+			aggregateUnaryOperations(_op, _groups, _ret, _rl, _ru);
 			return _ret;
 		}
 	}
 
 	private static class SizeEstimTask implements Callable<CompressedSizeInfo> 
 	{
-		private CompressedSizeEstimator _estim = null;
-		private int _col = -1;
+		private final CompressedSizeEstimator _estim;
+		private final int _col;
 		
 		protected SizeEstimTask( CompressedSizeEstimator estim, int col )  {
 			_estim = estim;
@@ -1533,34 +1658,34 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 
 	private static class CompressTask implements Callable<ColGroup> 
 	{
-		private MatrixBlock _in = null;
-		private CompressedSizeEstimator _estim = null;
-		private HashMap<Integer, Double> _compRatios = null;
-		private int _rlen = -1;
-		private double _sp = -1;
-		private int[] _colIndexes = null;
-		
-		protected CompressTask( MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, double sp, int[] colIndexes )  {
+		private final MatrixBlock _in;
+		private final CompressedSizeEstimator _estim;
+		private final HashMap<Integer, Double> _compRatios;
+		private final int _rlen;
+		private final int[] _colIndexes;
+		private final boolean _denseEst;
+		
+		protected CompressTask( MatrixBlock in, CompressedSizeEstimator estim, HashMap<Integer, Double> compRatios, int rlen, int[] colIndexes, boolean denseEst )  {
 			_in = in;
 			_estim = estim;
 			_compRatios = compRatios;
 			_rlen = rlen;
-			_sp = sp;
 			_colIndexes = colIndexes;
+			_denseEst = denseEst;
 		}
 		
 		@Override
 		public ColGroup call() throws DMLRuntimeException {
-			return compressColGroup(_in, _estim, _compRatios, _rlen, _sp, _colIndexes);
+			return compressColGroup(_in, _estim, _compRatios, _rlen, _colIndexes, _denseEst);
 		}
 	}
 	
 	private static class DecompressTask implements Callable<Object> 
 	{
-		private List<ColGroup> _colGroups = null;
-		private MatrixBlock _ret = null;
-		private int _rl = -1;
-		private int _ru = -1;
+		private final List<ColGroup> _colGroups;
+		private final MatrixBlock _ret;
+		private final int _rl;
+		private final int _ru;
 		
 		protected DecompressTask( List<ColGroup> colGroups, MatrixBlock ret, int rl, int ru )  {
 			_colGroups = colGroups;
@@ -1735,15 +1860,6 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 		MatrixBlock tmp = isCompressed() ? decompress() : this;
 		return tmp.zeroOutOperations(result, range, complementary);
 	}
-	
-	@Override
-	public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op,
-			MatrixValue result, int blockingFactorRow, int blockingFactorCol,
-			MatrixIndexes indexesIn) throws DMLRuntimeException {
-		printDecompressWarning("aggregateUnaryOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
-		return tmp.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn);
-	}
 
 	@Override
 	public CM_COV_Object cmOperations(CMOperator op) throws DMLRuntimeException {
@@ -2000,4 +2116,11 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			LOG.warn("Operation '"+operation+"' not supported yet - decompressing for ULA operations.");
 		}
 	}
+	
+	private HashSet<Integer> seq(int from, int to, int incr) {
+		HashSet<Integer> ret = new HashSet<Integer>();
+		for (int i = from; i <= to; i+=incr)
+			ret.add(i);
+		return ret;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
deleted file mode 100644
index 70308bb..0000000
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-/**
- * Used for the finding columns to co-code
- * 
- */
-public class PlanningBinPacker 
-{
-	private final float _binWeight;
-	private final List<Integer> _items;
-	private final List<Float> _itemWeights;
-
-	public PlanningBinPacker(float binWeight, List<Integer> items, List<Float> itemWeights) {
-		_binWeight = binWeight;
-		_items = items;
-		_itemWeights = itemWeights;
-	}
-
-	/**
-	 * NOTE: upper bound is 17/10 OPT
-	 * 
-	 * @return key: available space, value: list of the bins that have that free space
-	 */
-	public TreeMap<Float, List<List<Integer>>> packFirstFit() {
-		return packFirstFit(_items, _itemWeights);
-	}
-
-	private TreeMap<Float, List<List<Integer>>> packFirstFit(List<Integer> items, List<Float> itemWeights) 
-	{
-		// when searching for a bin, the first bin in the list is used
-		TreeMap<Float, List<List<Integer>>> bins = new TreeMap<Float, List<List<Integer>>>();
-		// first bin
-		bins.put(_binWeight, createBinList());
-		int numItems = items.size();
-		for (int i = 0; i < numItems; i++) {
-			float itemWeight = itemWeights.get(i);
-			Map.Entry<Float, List<List<Integer>>> entry = bins
-					.ceilingEntry(itemWeight);
-			if (entry == null) {
-				// new bin
-				float newBinWeight = _binWeight - itemWeight;
-				List<List<Integer>> binList = bins.get(newBinWeight);
-				if (binList == null) {
-					bins.put(newBinWeight, createBinList(items.get(i)));
-				} else {
-					List<Integer> newBin = new ArrayList<Integer>();
-					newBin.add(items.get(i));
-					binList.add(newBin);
-				}
-			} else {
-				// add to the first bin in the list
-				List<Integer> assignedBin = entry.getValue().remove(0);
-				assignedBin.add(items.get(i));
-				if (entry.getValue().size() == 0)
-					bins.remove(entry.getKey());
-				float newBinWeight = entry.getKey() - itemWeight;
-				List<List<Integer>> newBinsList = bins.get(newBinWeight);
-				if (newBinsList == null) {
-					// new bin
-					bins.put(newBinWeight, createBinList(assignedBin));
-				} else {
-					newBinsList.add(assignedBin);
-				}
-			}
-		}
-		return bins;
-	}
-
-	private List<List<Integer>> createBinList() {
-		List<List<Integer>> binList = new ArrayList<List<Integer>>();
-		binList.add(new ArrayList<Integer>());
-		return binList;
-	}
-
-	private List<List<Integer>> createBinList(int item) {
-		List<List<Integer>> binList = new ArrayList<List<Integer>>();
-		List<Integer> bin = new ArrayList<Integer>();
-		binList.add(bin);
-		bin.add(item);
-		return binList;
-	}
-
-	private List<List<Integer>> createBinList(List<Integer> bin) {
-		List<List<Integer>> binList = new ArrayList<List<Integer>>();
-		binList.add(bin);
-		return binList;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
deleted file mode 100644
index 9313cd9..0000000
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.TreeMap;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
-
-public class PlanningCoCoder 
-{
-	//constants for weight computation
-	private final static float GROUPABILITY_THRESHOLD = 0.00064f;
-	private final static float PARTITION_WEIGHT = 0.05F; //higher values lead to more grouping
-	private final static float PARTITION_SIZE = PARTITION_WEIGHT * GROUPABILITY_THRESHOLD;
-
-	public static List<int[]> findCocodesByPartitioning(CompressedSizeEstimator sizeEstimator, List<Integer> availCols, 
-			List<Integer> colsCardinalities, List<Long> compressedSize, int numRows, double sparsity, int k) 
-		throws DMLRuntimeException 
-	{
-		List<int[]> retGroups = new ArrayList<int[]>();
-		
-		// filtering out non-groupable columns as singleton groups
-		// weighted of each column is the ratio of its cardinality to the number
-		// of rows scaled by the matrix sparsity
-		int numCols = availCols.size();
-		List<Integer> groupCols = new ArrayList<Integer>();
-		List<Float> groupColWeights = new ArrayList<Float>();
-		HashMap<Integer, GroupableColInfo> groupColsInfo = new HashMap<Integer, GroupableColInfo>();
-		for (int i = 0; i < numCols; i++) {
-			int colIx = availCols.get(i);
-			int cardinality = colsCardinalities.get(i);
-			float weight = ((float) cardinality) / numRows;
-			if (weight <= GROUPABILITY_THRESHOLD) {
-				groupCols.add(colIx);
-				groupColWeights.add(weight);
-				groupColsInfo.put(colIx, new GroupableColInfo(weight,compressedSize.get(i)));
-			} else {
-				retGroups.add(new int[] { colIx });
-			}
-		}
-		
-		// bin packing based on PARTITION_WEIGHT and column weights
-		float weight = computeWeightForCoCoding(numRows, sparsity);
-		TreeMap<Float, List<List<Integer>>> bins = new PlanningBinPacker(
-				weight, groupCols, groupColWeights).packFirstFit();
-
-		// brute force grouping within each partition
-		retGroups.addAll( (k > 1) ?
-				getCocodingGroupsBruteForce(bins, groupColsInfo, sizeEstimator, numRows, k) :
-				getCocodingGroupsBruteForce(bins, groupColsInfo, sizeEstimator, numRows));
-			
-		return retGroups;
-	}
-
-	private static List<int[]> getCocodingGroupsBruteForce(TreeMap<Float, List<List<Integer>>> bins, HashMap<Integer, GroupableColInfo> groupColsInfo, CompressedSizeEstimator estim, int rlen) 
-	{
-		List<int[]> retGroups = new ArrayList<int[]>();		
-		for (List<List<Integer>> binList : bins.values()) {
-			for (List<Integer> bin : binList) {
-				// building an array of singleton CoCodingGroup
-				ArrayList<PlanningCoCodingGroup> sgroups = new ArrayList<PlanningCoCodingGroup>();
-				for (Integer col : bin)
-					sgroups.add(new PlanningCoCodingGroup(col, groupColsInfo.get(col)));
-				// brute force co-coding	
-				PlanningCoCodingGroup[] outputGroups = findCocodesBruteForce(
-						estim, rlen, sgroups.toArray(new PlanningCoCodingGroup[0]));
-				for (PlanningCoCodingGroup grp : outputGroups)
-					retGroups.add(grp.getColIndices());
-			}
-		}
-		
-		return retGroups;
-	}
-
-	private static List<int[]> getCocodingGroupsBruteForce(TreeMap<Float, List<List<Integer>>> bins, HashMap<Integer, GroupableColInfo> groupColsInfo, CompressedSizeEstimator estim, int rlen, int k) 
-		throws DMLRuntimeException 
-	{
-		List<int[]> retGroups = new ArrayList<int[]>();		
-		try {
-			ExecutorService pool = Executors.newFixedThreadPool( k );
-			ArrayList<CocodeTask> tasks = new ArrayList<CocodeTask>();
-			for (List<List<Integer>> binList : bins.values())
-				for (List<Integer> bin : binList) {
-					// building an array of singleton CoCodingGroup
-					ArrayList<PlanningCoCodingGroup> sgroups = new ArrayList<PlanningCoCodingGroup>();
-					for (Integer col : bin)
-						sgroups.add(new PlanningCoCodingGroup(col, groupColsInfo.get(col)));
-					tasks.add(new CocodeTask(estim, sgroups, rlen));
-				}
-			List<Future<PlanningCoCodingGroup[]>> rtask = pool.invokeAll(tasks);	
-			for( Future<PlanningCoCodingGroup[]> lrtask : rtask )
-				for (PlanningCoCodingGroup grp : lrtask.get())
-					retGroups.add(grp.getColIndices());
-			pool.shutdown();
-		}
-		catch(Exception ex) {
-			throw new DMLRuntimeException(ex);
-		}
-		
-		return retGroups;
-	}
-
-	/**
-	 * Identify columns to code together. Uses a greedy approach that merges
-	 * pairs of column groups into larger groups. Each phase of the greedy
-	 * algorithm considers all combinations of pairs to merge.
-	 * 
-	 * @param sizeEstimator compressed size estimator
-	 * @param numRowsWeight number of rows weight
-	 * @param singltonGroups planning co-coding groups
-	 * @return
-	 */
-	private static PlanningCoCodingGroup[] findCocodesBruteForce(
-			CompressedSizeEstimator sizeEstimator, float numRowsWeight,
-			PlanningCoCodingGroup[] singltonGroups) 
-	{
-		// Populate a priority queue with all available 2-column cocodings.
-		PriorityQueue<PlanningGroupMergeAction> q = new PriorityQueue<PlanningGroupMergeAction>();
-		for (int leftIx = 0; leftIx < singltonGroups.length; leftIx++) {
-			PlanningCoCodingGroup leftGrp = singltonGroups[leftIx];
-			for (int rightIx = leftIx + 1; rightIx < singltonGroups.length; rightIx++) {
-				PlanningCoCodingGroup rightGrp = singltonGroups[rightIx];
-				// at least one of the two groups should be low-cardinality
-				float cardRatio = leftGrp.getCardinalityRatio() + rightGrp.getCardinalityRatio(); 
-				if ( cardRatio < GROUPABILITY_THRESHOLD) {
-					PlanningGroupMergeAction potentialMerge = new PlanningGroupMergeAction(
-							sizeEstimator, numRowsWeight, leftGrp, rightGrp);
-					if (potentialMerge.getChangeInSize() < 0) {
-						q.add(potentialMerge);
-					}
-				}
-			}
-		}
-		PlanningCoCodingGroup[] colGroups = singltonGroups;
-		
-		// Greedily merge groups until we can no longer reduce the number of
-		// runs by merging groups
-		while (q.size() > 0) {
-			PlanningGroupMergeAction merge = q.poll();
-
-			// The queue can contain merge actions involving column groups that
-			// have already been merged.
-			// Filter those actions out.
-			int leftIx = findInArray(colGroups, merge.getLeftGrp());
-			int rightIx = findInArray(colGroups, merge.getRightGrp());
-			if (leftIx < 0 || rightIx < 0) {
-				// One or more of the groups to be merged has already been made
-				// part of another group.
-				// Drop the merge action.
-			} else {
-				PlanningCoCodingGroup mergedGrp = merge.getMergedGrp();
-
-				PlanningCoCodingGroup[] newColGroups = new PlanningCoCodingGroup[colGroups.length - 1];
-				int targetIx = 0;
-				for (int i = 0; i < colGroups.length; i++) {
-					if (i != leftIx && i != rightIx) {
-						newColGroups[targetIx] = colGroups[i];
-						targetIx++;
-					}
-				}
-
-				// New group goes at the end to (hopefully) speed up future
-				// linear search operations
-				newColGroups[newColGroups.length - 1] = mergedGrp;
-
-				// Consider merging the new group with all the other
-				// pre-existing groups.
-				for (int i = 0; i < newColGroups.length - 1; i++) {
-					PlanningCoCodingGroup newLeftGrp = newColGroups[i];
-					PlanningCoCodingGroup newRightGrp = mergedGrp;
-					if (newLeftGrp.getCardinalityRatio()
-							+ newRightGrp.getCardinalityRatio() < GROUPABILITY_THRESHOLD) {
-						PlanningGroupMergeAction newPotentialMerge = new PlanningGroupMergeAction(
-								sizeEstimator, numRowsWeight, newLeftGrp,
-								newRightGrp);
-						if (newPotentialMerge.getChangeInSize() < 0) {
-							q.add(newPotentialMerge);
-						}
-					}
-				}
-				colGroups = newColGroups;
-			}
-		}
-		return colGroups;
-	}
-
-	private static float computeWeightForCoCoding(int numRows, double sparsity) {
-		//we use a constant partition size (independent of the number of rows
-		//in order to ensure constant compression speed independent of blocking)
-		return PARTITION_SIZE;
-	}
-
-	private static int findInArray(Object[] arr, Object val) {
-		for (int i = 0; i < arr.length; i++) {
-			if (arr[i].equals(val)) {
-				return i;
-			}
-		}
-		return -1;
-	}
-
-	protected static class GroupableColInfo {
-		float cardRatio;
-		long size;
-
-		public GroupableColInfo(float lcardRatio, long lsize) {
-			cardRatio = lcardRatio;
-			size = lsize;
-		}
-	}
-
-	private static class CocodeTask implements Callable<PlanningCoCodingGroup[]> 
-	{
-		private CompressedSizeEstimator _estim = null;
-		private ArrayList<PlanningCoCodingGroup> _sgroups = null;
-		private int _rlen = -1;
-		
-		protected CocodeTask( CompressedSizeEstimator estim, ArrayList<PlanningCoCodingGroup> sgroups, int rlen )  {
-			_estim = estim;
-			_sgroups = sgroups;
-			_rlen = rlen;
-		}
-		
-		@Override
-		public PlanningCoCodingGroup[] call() throws DMLRuntimeException {
-			// brute force co-coding	
-			return findCocodesBruteForce(_estim, _rlen, 
-					_sgroups.toArray(new PlanningCoCodingGroup[0]));
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
deleted file mode 100644
index 9ee0d7e..0000000
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.compress.PlanningCoCoder.GroupableColInfo;
-import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
-import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
-
-/** 
- * Class to represent information about co-coding a group of columns. 
- * 
- */
-public class PlanningCoCodingGroup 
-{
-	private int[] _colIndexes;
-	private long _estSize;
-	private float _cardRatio;
-
-	/**
-	 * Constructor for a one-column group; i.e. do not co-code a given column.
-	 * 
-	 * @param col column
-	 * @param info groupable column info
-	 */
-	public PlanningCoCodingGroup(int col, GroupableColInfo info) {
-		_colIndexes = new int[]{col};
-		_estSize = info.size;
-		_cardRatio = info.cardRatio;
-	}
-
-	/**
-	 * Constructor for merging two disjoint groups of columns
-	 * 
-	 * @param grp1   first group of columns to merge
-	 * @param grp2   second group to merge
-	 * @param bitmapSizeEstimator bitmap size estimator
-	 * @param numRowsWeight numRows x sparsity
-	 */
-	public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2,
-			CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight) 
-	{
-		// merge sorted non-empty arrays
-		_colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length];		
-		int grp1Ptr = 0, grp2Ptr = 0;
-		for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) {
-			if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) {
-				_colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++];
-				if (grp1Ptr == grp1._colIndexes.length) {
-					System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes,
-							mergedIx + 1, grp2._colIndexes.length - grp2Ptr);
-					break;
-				}
-			} else {
-				_colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++];
-				if (grp2Ptr == grp2._colIndexes.length) {
-					System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes,
-							mergedIx + 1, grp1._colIndexes.length - grp1Ptr);
-					break;
-				}
-			}
-		}
-		
-		// estimating size info
-		CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator
-				.estimateCompressedColGroupSize(_colIndexes);
-		_estSize = groupSizeInfo.getMinSize();
-		_cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight;
-	}
-
-	public int[] getColIndices() {
-		return _colIndexes;
-	}
-
-	/**
-	 * Obtain estimated compressed size of the grouped columns.
-	 * 
-	 * @return estimated compressed size of the grouped columns
-	 */
-	public long getEstSize() {
-		return _estSize;
-	}
-
-	public float getCardinalityRatio() {
-		return _cardRatio;
-	}
-
-	@Override
-	public String toString() {
-		return Arrays.toString(_colIndexes);
-	}
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
deleted file mode 100644
index 47d46d5..0000000
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
-
-/**
- * Internal data structure for tracking potential merges of column groups in
- * co-coding calculations.
- * 
- */
-class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction> 
-{
-	private PlanningCoCodingGroup _leftGrp;   //left input
-	private PlanningCoCodingGroup _rightGrp;  //right input
-	private PlanningCoCodingGroup _mergedGrp; //output
-	private long _changeInSize;
-
-	
-	public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator,
-			float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) {
-		_leftGrp = leftGrp;
-		_rightGrp = rightGrp;
-		_mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight);
-
-		// Negative size change ==> Decrease in size
-		_changeInSize = _mergedGrp.getEstSize() 
-				- leftGrp.getEstSize() - rightGrp.getEstSize();
-	}
-
-	public int compareTo(PlanningGroupMergeAction o) {
-		// We only sort by the change in size
-		return (int) Math.signum(_changeInSize - o._changeInSize);
-	}
-
-	@Override
-	public String toString() {
-		return String.format("Merge %s and %s", _leftGrp, _rightGrp);
-	}
-
-	public PlanningCoCodingGroup getLeftGrp() {
-		return _leftGrp;
-	}
-
-	public PlanningCoCodingGroup getRightGrp() {
-		return _rightGrp;
-	}
-
-	public PlanningCoCodingGroup getMergedGrp() {
-		return _mergedGrp;
-	}
-
-	public long getChangeInSize() {
-		return _changeInSize;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
index 63c0467..60d0532 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
@@ -63,7 +63,6 @@ public class ReaderColumnSelectionSparse extends ReaderColumnSelection
 		if( data.getSparseBlock()!=null )
 		for( int i=0; i<colIndexes.length; i++ )
 			sparseCols[i] = data.getSparseBlock().get(colIndexes[i]);
-		Arrays.fill(sparsePos, 0);
 	}
 
 	@Override

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
index d62bae9..2f68edf 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
@@ -21,10 +21,13 @@ package org.apache.sysml.runtime.compress;
 
 import java.util.Arrays;
 
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
 import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
 import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry;
 import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
+import org.apache.sysml.runtime.compress.utils.IntArrayList;
+import org.apache.sysml.runtime.util.SortUtils;
 
 /** 
  * Uncompressed representation of one or more columns in bitmap format. 
@@ -32,13 +35,13 @@ import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
  */
 public final class UncompressedBitmap 
 {
-	private int _numCols;
+	private final int _numCols;
 
 	/** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/
 	private double[] _values;
 
 	/** Bitmaps (as lists of offsets) for each of the values. */
-	private int[][] _offsetsLists;
+	private IntArrayList[] _offsetsLists;
 
 	public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns ) 
 	{
@@ -46,11 +49,11 @@ public final class UncompressedBitmap
 		// Convert inputs to arrays
 		int numVals = distinctVals.size();
 		_values = new double[numVals*numColumns];
-		_offsetsLists = new int[numVals][];
+		_offsetsLists = new IntArrayList[numVals];
 		int bitmapIx = 0;
 		for( DArrayIListEntry val : distinctVals.extractValues()) {
 			System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns);
-			_offsetsLists[bitmapIx++] = val.value.extractValues();
+			_offsetsLists[bitmapIx++] = val.value;
 		}
 		_numCols = numColumns;
 	}
@@ -61,11 +64,11 @@ public final class UncompressedBitmap
 		// Convert inputs to arrays
 		int numVals = distinctVals.size();
 		_values = new double[numVals];
-		_offsetsLists = new int[numVals][];
+		_offsetsLists = new IntArrayList[numVals];
 		int bitmapIx = 0;
 		for(DIListEntry val : distinctVals.extractValues()) {
 			_values[bitmapIx] = val.key;
-			_offsetsLists[bitmapIx++] = val.value.extractValues();
+			_offsetsLists[bitmapIx++] = val.value;
 		}
 		_numCols = 1;
 	}
@@ -74,6 +77,15 @@ public final class UncompressedBitmap
 		return _numCols;
 	}
 	
+	/** 
+	 * Get all values without unnecessary allocations and copies.
+	 * 
+	 * @return dictionary of value tuples
+	 */
+	public double[] getValues() {
+		return _values;
+	}
+	
 	/**
 	 * Obtain tuple of column values associated with index.
 	 * 
@@ -94,21 +106,46 @@ public final class UncompressedBitmap
 		return _values.length / _numCols;
 	}
 
-	/**
-	 * Obtain array of offsets of the rows containing index value
-	 * 
-	 * @param ix   index of a particular distinct value
-	 * @return IMMUTABLE array of the offsets of the rows containing the value
-	 *         with the indicated index
-	 */
-	public int[] getOffsetsList(int ix) {
+	public IntArrayList getOffsetsList(int ix) {
 		return _offsetsLists[ix];
 	}
 
-	public int getNumOffsets() {
-		int ret = 0;
-		for( int[] offlist : _offsetsLists )
-			ret += offlist.length;
+	public long getNumOffsets() {
+		long ret = 0;
+		for( IntArrayList offlist : _offsetsLists )
+			ret += offlist.size();
 		return ret;
 	}
+	
+	public int getNumOffsets(int ix) {
+		return _offsetsLists[ix].size();
+	}
+	
+	public void sortValuesByFrequency() {
+		int numVals = getNumValues();
+		int numCols = getNumColumns();
+		
+		double[] freq = new double[numVals];
+		int[] pos = new int[numVals];
+		
+		//populate the temporary arrays
+		for(int i=0; i<numVals; i++) {
+			freq[i] = getNumOffsets(i);
+			pos[i] = i;
+		}
+		
+		//sort ascending and reverse (descending)
+		SortUtils.sortByValue(0, numVals, freq, pos);
+		ArrayUtils.reverse(pos);
+		
+		//create new value and offset list arrays
+		double[] lvalues = new double[numVals*numCols];
+		IntArrayList[] loffsets = new IntArrayList[numVals];
+		for(int i=0; i<numVals; i++) {
+			System.arraycopy(_values, pos[i]*numCols, lvalues, i*numCols, numCols);
+			loffsets[i] = _offsetsLists[pos[i]];
+		}
+		_values = lvalues;
+		_offsetsLists = loffsets;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitioner.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitioner.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitioner.java
new file mode 100644
index 0000000..05af19d
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitioner.java
@@ -0,0 +1,19 @@
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
+
+public abstract class ColumnGroupPartitioner 
+{
+	/**
+	 * Partitions a list of columns into a list of partitions that contains subsets of columns.
+	 * Note that this call must compute a complete and disjoint partitioning.
+	 * 
+	 * @param groupCols list of columns 
+	 * @param groupColsInfo list of column infos
+	 * @return list of partitions (where each partition is a list of columns)
+	 */
+	public abstract List<List<Integer>> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
new file mode 100644
index 0000000..0fb6abe
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
+import org.apache.sysml.runtime.util.SortUtils;
+
+/**
+ * Column group partitioning with bin packing heuristic.
+ * 
+ */
+public class ColumnGroupPartitionerBinPacking extends ColumnGroupPartitioner
+{
+	private static final boolean FIRST_FIT_DEC = true;
+	private static final int MAX_COL_PER_GROUP = Integer.MAX_VALUE;
+
+	//we use a constant partition size (independent of the number of rows
+	//in order to ensure constant compression speed independent of blocking)
+	public static double BIN_CAPACITY = 0.000032; //higher values, more grouping
+	
+	@Override
+	public List<List<Integer>> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo) 
+	{
+		//obtain column weights
+		int[] items = new int[groupCols.size()];
+		double[] itemWeights = new double[groupCols.size()];
+		for( int i=0; i<groupCols.size(); i++ ) {
+			int col = groupCols.get(i);
+			items[i] = col;
+			itemWeights[i] = groupColsInfo.get(col).cardRatio;
+		} 
+		
+		//sort items (first fit decreasing)
+		if( FIRST_FIT_DEC ) {
+			SortUtils.sortByValue(0, items.length, itemWeights, items);
+			ArrayUtils.reverse(items);
+			ArrayUtils.reverse(itemWeights);
+		}
+		
+		//partition columns via bin packing
+		return packFirstFit(items, itemWeights);
+	}
+
+	/**
+	 * NOTE: upper bound is 17/10 OPT
+	 * 
+	 * @param items the items in terms of columns
+	 * @param itemWeights the weights of the items
+	 * @return
+	 */
+	private List<List<Integer>> packFirstFit(int[] items, double[] itemWeights) 
+	{
+		List<List<Integer>> bins = new ArrayList<List<Integer>>();
+		List<Double> binWeights = new ArrayList<Double>(); 
+		
+		for( int i = 0; i < items.length; i++ ) {
+			//add to existing bin
+			boolean assigned = false;
+			for( int j = 0; j < bins.size(); j++ ) {
+				double newBinWeight = binWeights.get(j)-itemWeights[i];
+				if( newBinWeight >= 0 && bins.get(j).size() < MAX_COL_PER_GROUP-1 ){
+					bins.get(j).add(items[i]);
+					binWeights.set(j, newBinWeight);
+					assigned = true; break;
+				}
+			}
+				
+			//create new bin at end of list
+			if( !assigned ) {
+				bins.add(new ArrayList<Integer>(Arrays.asList(items[i])));
+				binWeights.add(BIN_CAPACITY-itemWeights[i]);
+			}		
+		}
+		
+		return bins;
+	}
+}



[5/5] incubator-systemml git commit: [SYSTEMML-449] Compressed linear algebra v2

Posted by mb...@apache.org.
[SYSTEMML-449] Compressed linear algebra v2

This patch bundles various improvements for the experimental feature
'compressed linear algebra'. In detail, this includes the following
extensions:

* [SYSTEMML-820] New column encoding format DDC (dense dictionary
coding) with DDC1 and DDC2 for 1 and 2 byte codes as well as efficient
operations.

* [SYSTEMML-815] Hardened sample-based estimators (e.g.,
uncompressed size, empty segments, reduced population size, and
stabilization parameter as well as numerically stable implementations),
incl increased sample fraction and removed unnecessary parameters.

* [SYSTEMML-814] Debugging tools for compression plans, compression
tracing, and compression statistics.

* New greedy column grouping algorithm with pruning and memoization.

* New static column partitioning and changed bin packing heuristics.

* Additional operations (e.g., cache-conscious rowSums)

* Various fixes and performance improvements throughout all CLA
components.

* Extended test cases to cover OLE, RLE, DDC, and UC groups as well as
combinations thereof.

* Various internal refactorings to simplify the extension and
maintenance of CLA. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/37a215bc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/37a215bc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/37a215bc

Branch: refs/heads/master
Commit: 37a215bc3be26495c351eae6be4b85eaf22daedc
Parents: 390b81c
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Feb 5 16:22:01 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed Feb 8 03:12:18 2017 +0100

----------------------------------------------------------------------
 .../sysml/runtime/compress/BitmapEncoder.java   |  22 +-
 .../apache/sysml/runtime/compress/ColGroup.java |  39 +-
 .../sysml/runtime/compress/ColGroupBitmap.java  | 580 ------------------
 .../sysml/runtime/compress/ColGroupDDC.java     | 227 +++++++
 .../sysml/runtime/compress/ColGroupDDC1.java    | 358 +++++++++++
 .../sysml/runtime/compress/ColGroupDDC2.java    | 312 ++++++++++
 .../sysml/runtime/compress/ColGroupOLE.java     | 173 +++---
 .../sysml/runtime/compress/ColGroupOffset.java  | 424 +++++++++++++
 .../sysml/runtime/compress/ColGroupRLE.java     | 178 +++---
 .../runtime/compress/ColGroupUncompressed.java  |  41 +-
 .../sysml/runtime/compress/ColGroupValue.java   | 303 ++++++++++
 .../runtime/compress/CompressedMatrixBlock.java | 419 ++++++++-----
 .../runtime/compress/PlanningBinPacker.java     | 112 ----
 .../sysml/runtime/compress/PlanningCoCoder.java | 257 --------
 .../runtime/compress/PlanningCoCodingGroup.java | 110 ----
 .../compress/PlanningGroupMergeAction.java      |  73 ---
 .../compress/ReaderColumnSelectionSparse.java   |   1 -
 .../runtime/compress/UncompressedBitmap.java    |  73 ++-
 .../compress/cocode/ColumnGroupPartitioner.java |  19 +
 .../ColumnGroupPartitionerBinPacking.java       | 100 +++
 .../cocode/ColumnGroupPartitionerStatic.java    |  52 ++
 .../compress/cocode/PlanningCoCoder.java        | 236 ++++++++
 .../compress/cocode/PlanningCoCodingGroup.java  | 175 ++++++
 .../compress/cocode/PlanningMemoTable.java      |  75 +++
 .../compress/estim/CompressedSizeEstimator.java |  47 +-
 .../estim/CompressedSizeEstimatorExact.java     |   5 +-
 .../estim/CompressedSizeEstimatorSample.java    | 605 ++++++++++---------
 .../compress/estim/CompressedSizeInfo.java      |  46 +-
 .../compress/estim/SizeEstimatorFactory.java    |   6 +-
 .../runtime/compress/utils/ConverterUtils.java  |  16 +
 .../runtime/compress/utils/IntArrayList.java    |  13 +-
 .../compress/utils/LinearAlgebraUtils.java      | 164 +++++
 .../compress/BasicCompressionTest.java          |  40 +-
 .../functions/compress/BasicGetValueTest.java   |  40 +-
 .../compress/BasicMatrixAppendTest.java         |  40 +-
 .../compress/BasicMatrixMultChainTest.java      |  76 ++-
 .../BasicMatrixTransposeSelfMultTest.java       |  40 +-
 .../compress/BasicMatrixVectorMultTest.java     |  40 +-
 .../BasicScalarOperationsSparseUnsafeTest.java  |  40 +-
 .../compress/BasicScalarOperationsTest.java     |  40 +-
 .../BasicTransposeSelfLeftMatrixMultTest.java   |  40 +-
 .../compress/BasicUnaryAggregateTest.java       | 326 +++++++---
 .../compress/BasicVectorMatrixMultTest.java     |  40 +-
 .../functions/compress/CompressedLinregCG.java  |   5 +-
 .../compress/CompressedSerializationTest.java   |  40 +-
 .../compress/LargeCompressionTest.java          |  40 +-
 .../compress/LargeMatrixVectorMultTest.java     |  40 +-
 .../compress/LargeParMatrixVectorMultTest.java  |  40 +-
 .../compress/LargeParUnaryAggregateTest.java    | 337 +++++++----
 .../compress/LargeVectorMatrixMultTest.java     |  40 +-
 .../functions/compress/ParCompressionTest.java  |  40 +-
 .../compress/ParMatrixMultChainTest.java        |  66 +-
 .../compress/ParMatrixVectorMultTest.java       |  40 +-
 .../ParTransposeSelfLeftMatrixMultTest.java     |  40 +-
 .../compress/ParUnaryAggregateTest.java         | 327 ++++++----
 .../compress/ParVectorMatrixMultTest.java       |  40 +-
 56 files changed, 4733 insertions(+), 2385 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
index 7fd2c69..b27112f 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
@@ -20,7 +20,6 @@
 package org.apache.sysml.runtime.compress;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 
 import org.apache.sysml.runtime.compress.utils.DblArray;
 import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
@@ -100,8 +99,8 @@ public class BitmapEncoder
 	 *            the offsets of different bits
 	 * @return compressed version of said bitmap
 	 */
-	public static char[] genRLEBitmap(int[] offsets) {
-		if( offsets.length == 0 )
+	public static char[] genRLEBitmap(int[] offsets, int len) {
+		if( len == 0 )
 			return new char[0]; //empty list
 
 		// Use an ArrayList for correctness at the expense of temp space
@@ -139,7 +138,7 @@ public class BitmapEncoder
 		curRunLen = 1;
 
 		// Process the remaining offsets
-		for (int i = 1; i < offsets.length; i++) {
+		for (int i = 1; i < len; i++) {
 
 			int absOffset = offsets[i];
 
@@ -179,9 +178,8 @@ public class BitmapEncoder
 
 		// Convert wasteful ArrayList to packed array.
 		char[] ret = new char[buf.size()];
-		for (int i = 0; i < buf.size(); i++) {
+		for(int i = 0; i < buf.size(); i++ )
 			ret[i] = buf.get(i);
-		}
 		return ret;
 	}
 
@@ -194,21 +192,19 @@ public class BitmapEncoder
 	 *            the offsets of different bits
 	 * @return compressed version of said bitmap
 	 */
-	public static char[] genOffsetBitmap(int[] offsets) 
-	{
-		int lastOffset = offsets[offsets.length - 1];
+	public static char[] genOffsetBitmap(int[] offsets, int len) 
+	{ 
+		int lastOffset = offsets[len - 1];
 
 		// Build up the blocks
 		int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
 		// To simplify the logic, we make two passes.
 		// The first pass divides the offsets by block.
 		int[] blockLengths = new int[numBlocks];
-		Arrays.fill(blockLengths, 0);
 
-		for (int ix = 0; ix < offsets.length; ix++) {
+		for (int ix = 0; ix < len; ix++) {
 			int val = offsets[ix];
 			int blockForVal = val / BITMAP_BLOCK_SZ;
-
 			blockLengths[blockForVal]++;
 		}
 
@@ -238,7 +234,7 @@ public class BitmapEncoder
 
 		return encodedBlocks;
 	}
-
+	
 	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) 
 	{
 		//probe map for distinct items (for value or value groups)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index 586690c..bf1b822 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -40,9 +40,11 @@ public abstract class ColGroup implements Serializable
 	private static final long serialVersionUID = 2439785418908671481L;
 
 	public enum CompressionType  {
-		UNCOMPRESSED,   //uncompressed sparse/dense 
-		RLE_BITMAP,     //RLE bitmap
-		OLE_BITMAP;  //OLE bitmap
+		UNCOMPRESSED, //uncompressed sparse/dense 
+		RLE_BITMAP,  //RLE bitmap
+		OLE_BITMAP,  //OLE bitmap
+		DDC1, //DDC 1 byte
+		DDC2; //DDC 2 byte
 	}
 	
 	/**
@@ -53,23 +55,17 @@ public abstract class ColGroup implements Serializable
 
 	/** Number of rows in the matrix, for use by child classes. */
 	protected int _numRows;
-
-	/** How the elements of the column group are compressed. */
-	private CompressionType _compType;
-
 	
 	/**
 	 * Main constructor.
 	 * 
-	 * @param type compression type
 	 * @param colIndices
 	 *            offsets of the columns in the matrix block that make up the
 	 *            group
 	 * @param numRows
 	 *            total number of rows in the parent block
 	 */
-	protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
-		_compType = type;
+	protected ColGroup(int[] colIndices, int numRows) {
 		_colIndexes = colIndices;
 		_numRows = numRows;
 	}
@@ -77,16 +73,15 @@ public abstract class ColGroup implements Serializable
 	/**
 	 * Convenience constructor for converting indices to a more compact format.
 	 * 
-	 * @param type compression type
 	 * @param colIndicesList list of column indices
 	 * @param numRows total number of rows in the parent block
 	 */
-	protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
-		_compType = type;
+	protected ColGroup(List<Integer> colIndicesList, int numRows) {
 		_colIndexes = new int[colIndicesList.size()];
 		int i = 0;
 		for (Integer index : colIndicesList)
 			_colIndexes[i++] = index;
+		_numRows = numRows;
 	}
 
 	/**
@@ -126,9 +121,7 @@ public abstract class ColGroup implements Serializable
 	 * 
 	 * @return How the elements of the column group are compressed.
 	 */
-	public CompressionType getCompType() {
-		return _compType;
-	}
+	public abstract CompressionType getCompType();
 
 	public void shiftColIndices(int offset)  {
 		for( int i=0; i<_colIndexes.length; i++ )
@@ -143,14 +136,12 @@ public abstract class ColGroup implements Serializable
 	 *         in memory.
 	 */
 	public long estimateInMemorySize() {
-		// int numRows (4B) , array reference colIndices (8B) + array object
-		// overhead if exists (32B) + 4B per element, CompressionType compType
-		// (2 booleans 2B + enum overhead 32B + reference to enum 8B)
-		long size = 54;
-		if (_colIndexes == null)
-			return size;
-		else
-			return size + 32 + 4 * _colIndexes.length;
+		// object (12B padded to factors of 8), int numRows (4B), 
+		// array reference colIndices (8B) 
+		//+ array object overhead if exists (32B) + 4B per element
+		long size = 24;
+		return (_colIndexes == null) ? size : 
+			size + 32 + 4 * _colIndexes.length;
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
deleted file mode 100644
index dac18ef..0000000
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.functionobjects.Builtin;
-import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
-import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
-
-
-/**
- * Base class for column groups encoded with various types of bitmap encoding.
- * 
- * 
- * NOTES:
- *  * OLE: separate storage segment length and bitmaps led to a 30% improvement
- *    but not applied because more difficult to support both data layouts at the
- *    same time (distributed/local as well as w/ and w/o low-level opt)
- */
-public abstract class ColGroupBitmap extends ColGroup 
-{
-	private static final long serialVersionUID = -1635828933479403125L;
-	
-	public static final boolean LOW_LEVEL_OPT = true;	
-	//sorting of values by physical length helps by 10-20%, especially for serial, while
-	//slight performance decrease for parallel incl multi-threaded, hence not applied for
-	//distributed operations (also because compression time + garbage collection increases)
-	private static final boolean SORT_VALUES_BY_LENGTH = true; 
-	protected static final boolean CREATE_SKIPLIST = true;
-	
-	protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	
-	/** Distinct values associated with individual bitmaps. */
-	protected double[] _values; //linearized <numcol vals> <numcol vals>
-
-	/** Bitmaps, one per uncompressed value in {@link #_values}. */
-	protected int[] _ptr; //bitmap offsets per value
-	protected char[] _data; //linearized bitmaps (variable length)
-	protected boolean _zeros; //contains zero values
-	
-	protected int[] _skiplist;
-	
-	public ColGroupBitmap(CompressionType type) {
-		super(type, (int[]) null, -1);
-	}
-	
-	/**
-	 * Main constructor. Stores the headers for the individual bitmaps.
-	 * 
-	 * @param type column type
-	 * @param colIndices
-	 *            indices (within the block) of the columns included in this
-	 *            column
-	 * @param numRows
-	 *            total number of rows in the parent block
-	 * @param ubm
-	 *            Uncompressed bitmap representation of the block
-	 */
-	public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) 
-	{
-		super(type, colIndices, numRows);
-
-		// Extract and store just the distinct values. The bitmaps themselves go
-		// into the subclasses.
-		final int numCols = ubm.getNumColumns();
-		final int numVals = ubm.getNumValues();
-		
-		_values = new double[numVals*numCols];
-		_zeros = (ubm.getNumOffsets() < numRows);
-		
-		for (int i=0; i<numVals; i++) {
-			//note: deep copied internally on getValues
-			double[] tmp = ubm.getValues(i);
-			System.arraycopy(tmp, 0, _values, i*numCols, numCols);
-		}
-	}
-
-	/**
-	 * Constructor for subclass methods that need to create shallow copies
-	 * 
-	 * @param type compression type
-	 * @param colIndices
-	 *            raw column index information
-	 * @param numRows
-	 *            number of rows in the block
-	 * @param zeros ?
-	 * @param values
-	 *            set of distinct values for the block (associated bitmaps are
-	 *            kept in the subclass)
-	 */
-	protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, boolean zeros, double[] values) {
-		super(type, colIndices, numRows);
-		_zeros = zeros;
-		_values = values;
-	}
-	
-	protected final int len(int k) {
-		return _ptr[k+1] - _ptr[k];
-	}
-
-	protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
-	{
-		// compact bitmaps to linearized representation
-		if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
-			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
-		{
-			// sort value by num segments in descending order
-			TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
-			for( int i=0; i<numVals; i++ ) {
-				int revlen = totalLen-lbitmaps[i].length;
-				if( !tree.containsKey(revlen) )
-					tree.put(revlen, new ArrayList<Integer>());
-				tree.get(revlen).add(i);
-			}
-			
-			// compact bitmaps to linearized representation
-			_ptr = new int[numVals+1];
-			_data = new char[totalLen];
-			int pos = 0, off = 0;
-			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
-				for( Integer tmpix : e.getValue() ) {
-					int len = lbitmaps[tmpix].length;
-					_ptr[pos] = off;
-					System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
-					off += len;
-					pos++;
-				}
-			}
-			_ptr[numVals] = totalLen;
-			
-			// reorder values
-			double[] lvalues = new double[_values.length];
-			int off2 = 0; int numCols = _colIndexes.length;
-			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
-				for( Integer tmpix : e.getValue() ) {
-					System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);				
-					off2 += numCols;
-				}
-			}			
-			_values = lvalues;
-		}
-		else
-		{
-			// compact bitmaps to linearized representation
-			_ptr = new int[numVals+1];
-			_data = new char[totalLen];
-			for( int i=0, off=0; i<numVals; i++ ) {
-				int len = lbitmaps[i].length;
-				_ptr[i] = off;
-				System.arraycopy(lbitmaps[i], 0, _data, off, len);
-				off += len;
-			}
-			_ptr[numVals] = totalLen;
-		}
-	}
-	
-	@Override
-	public long estimateInMemorySize() {
-		long size = super.estimateInMemorySize();
-		
-		// adding the size of values
-		size += 8; //array reference
-		if (_values != null) {
-			size += 32 + _values.length * 8; //values
-		}
-		
-		// adding bitmaps size
-		size += 16; //array references
-		if (_data != null) {
-			size += 32 + _ptr.length * 4; // offsets
-			size += 32 + _data.length * 2;    // bitmaps
-		}
-	
-		return size;
-	}
-
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target, int rl, int ru) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		int[] colIndices = getColIndices();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				if( row<rl ) continue;
-				if( row>ru ) break;
-				
-				for (int colIx = 0; colIx < numCols; colIx++)
-					target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
-			}
-		}
-	}
-
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				for (int colIx = 0; colIx < numCols; colIx++) {
-					int origMatrixColIx = getColIndex(colIx);
-					int targetColIx = colIndexTargets[origMatrixColIx];
-					target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
-				}
-			}
-		}
-	}
-	
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target, int colpos) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				target.quickSetValue(row, 0, _values[valOff+colpos]);
-			}
-		}
-	}
-
-	//generic get for OLE/RLE, to be overwritten for performance
-	//potential: skip scan (segment length agg and run length) instead of decode
-	@Override
-	public double get(int r, int c) {
-		//find local column index
-		int ix = Arrays.binarySearch(_colIndexes, c);
-		if( ix < 0 )
-			throw new RuntimeException("Column index "+c+" not in bitmap group.");
-		
-		//find row index in value offset lists via scan
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				if( row == r )
-					return _values[valOff+ix];
-				else if( row > r )
-					break; //current value
-			}
-		}		
-		return 0;
-	}
-
-	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru)
-		throws DMLRuntimeException;
-
-	protected final double sumValues(int bitmapIx)
-	{
-		final int numCols = getNumCols();
-		final int valOff = bitmapIx * numCols;
-		
-		double val = 0.0;
-		for( int i = 0; i < numCols; i++ ) {
-			val += _values[valOff+i];
-		}
-		
-		return val;
-	}
-	
-	protected final double sumValues(int bitmapIx, double[] b)
-	{
-		final int numCols = getNumCols();
-		final int valOff = bitmapIx * numCols;
-		
-		double val = 0;
-		for( int i = 0; i < numCols; i++ ) {
-			val += _values[valOff+i] * b[i];
-		}
-		
-		return val;
-	}
-
-	protected final double mxxValues(int bitmapIx, Builtin builtin)
-	{
-		final int numCols = getNumCols();
-		final int valOff = bitmapIx * numCols;
-		
-		double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1);
-		for( int i = 0; i < numCols; i++ )
-			val = builtin.execute2(val, _values[valOff+i]);
-		
-		return val;
-	}
-
-	protected final double[] preaggValues(int numVals, double[] b) {
-		double[] ret = new double[numVals];
-		for( int k = 0; k < numVals; k++ )
-			ret[k] = sumValues(k, b);
-		
-		return ret;
-	}
-	
-	/**
-	 * Method for use by subclasses. Applies a scalar operation to the value
-	 * metadata stored in the superclass.
-	 * 
-	 * @param op
-	 *            scalar operation to perform
-	 * @return transformed copy of value metadata for this column group
-	 * @throws DMLRuntimeException if DMLRuntimeException occurs
-	 */
-	protected double[] applyScalarOp(ScalarOperator op)
-			throws DMLRuntimeException 
-	{
-		//scan over linearized values
-		double[] ret = new double[_values.length];
-		for (int i = 0; i < _values.length; i++) {
-			ret[i] = op.executeScalar(_values[i]);
-		}
-
-		return ret;
-	}
-
-	protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
-			throws DMLRuntimeException 
-	{
-		//scan over linearized values
-		double[] ret = new double[_values.length + numCols];
-		for( int i = 0; i < _values.length; i++ ) {
-			ret[i] = op.executeScalar(_values[i]);
-		}
-		
-		//add new value to the end
-		Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
-		
-		return ret;
-	}
-	
-	/**
-	 * NOTE: Shared across OLE/RLE because value-only computation. 
-	 * 
-	 * @param result matrix block
-	 * @param builtin ?
-	 */
-	protected void computeMxx(MatrixBlock result, Builtin builtin) 
-	{
-		//init and 0-value handling
-		double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1);
-		if( _zeros )
-			val = builtin.execute2(val, 0);
-		
-		//iterate over all values only
-		final int numVals = getNumValues();
-		final int numCols = getNumCols();		
-		for (int k = 0; k < numVals; k++)
-			for( int j=0, valOff = k*numCols; j<numCols; j++ )
-				val = builtin.execute2(val, _values[ valOff+j ]);
-		
-		//compute new partial aggregate
-		val = builtin.execute2(val, result.quickGetValue(0, 0));
-		result.quickSetValue(0, 0, val);
-	}
-	
-	/**
-	 * NOTE: Shared across OLE/RLE because value-only computation. 
-	 * 
-	 * @param result matrix block
-	 * @param builtin ?
-	 */
-	protected void computeColMxx(MatrixBlock result, Builtin builtin)
-	{
-		final int numVals = getNumValues();
-		final int numCols = getNumCols();
-		
-		//init and 0-value handling
-		double[] vals = new double[numCols];
-		Arrays.fill(vals, Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1));
-		if( _zeros ) {
-			for( int j = 0; j < numCols; j++ )
-				vals[j] = builtin.execute2(vals[j], 0);		
-		}
-		
-		//iterate over all values only
-		for (int k = 0; k < numVals; k++) 
-			for( int j=0, valOff=k*numCols; j<numCols; j++ )
-				vals[j] = builtin.execute2(vals[j], _values[ valOff+j ]);
-		
-		//copy results to output
-		for( int j=0; j<numCols; j++ )
-			result.quickSetValue(0, _colIndexes[j], vals[j]);
-	}
-	
-
-	/**
-	 * Obtain number of distrinct sets of values associated with the bitmaps in this column group.
-	 * 
-	 * @return the number of distinct sets of values associated with the bitmaps
-	 *         in this column group
-	 */
-	public int getNumValues() {
-		return _values.length / _colIndexes.length;
-	}
-
-	public double[] getValues() {
-		return _values;
-	}
-
-	public char[] getBitmaps() {
-		return _data;
-	}
-	
-	public int[] getBitmapOffsets() {
-		return _ptr;
-	}
-
-	public boolean hasZeros() {
-		return _zeros;
-	}
-	
-	/**
-	 * @param k
-	 *            index of a specific compressed bitmap (stored in subclass,
-	 *            index same as {@link #getValues})
-	 * @return an object for iterating over the row offsets in this bitmap. Only
-	 *         valid until the next call to this method. May be reused across
-	 *         calls.
-	 */
-	public abstract Iterator<Integer> getDecodeIterator(int k);
-
-	//TODO getDecodeIterator(int k, int rl, int ru)
-
-	/**
-	 * Utility function of sparse-unsafe operations.
-	 * 
-	 * @param ind ?
-	 * @return offsets
-	 * @throws DMLRuntimeException if DMLRuntimeException occurs
-	 */
-	protected int[] computeOffsets(boolean[] ind)
-		throws DMLRuntimeException 
-	{
-		//determine number of offsets
-		int numOffsets = 0;
-		for( int i=0; i<ind.length; i++ )
-			numOffsets += ind[i] ? 1 : 0;
-		
-		//create offset lists
-		int[] ret = new int[numOffsets];
-		for( int i=0, pos=0; i<ind.length; i++ )
-			if( ind[i] )
-				ret[pos++] = i;
-		
-		return ret;
-	}
-
-	@Override
-	public void readFields(DataInput in) 
-		throws IOException 
-	{
-		_numRows = in.readInt();
-		int numCols = in.readInt();
-		int numVals = in.readInt();
-		_zeros = in.readBoolean();
-		
-		//read col indices
-		_colIndexes = new int[ numCols ];
-		for( int i=0; i<numCols; i++ )
-			_colIndexes[i] = in.readInt();
-		
-		//read distinct values
-		_values = new double[numVals*numCols];
-		for( int i=0; i<numVals*numCols; i++ )
-			_values[i] = in.readDouble();
-		
-		//read bitmaps
-		int totalLen = in.readInt();
-		_ptr = new int[numVals+1];
-		_data = new char[totalLen];		
-		for( int i=0, off=0; i<numVals; i++ ) {
-			int len = in.readInt();
-			_ptr[i] = off;
-			for( int j=0; j<len; j++ )
-				_data[off+j] = in.readChar();
-			off += len;
-		}
-		_ptr[numVals] = totalLen;
-	}
-	
-	@Override
-	public void write(DataOutput out) 
-		throws IOException 
-	{
-		int numCols = getNumCols();
-		int numVals = getNumValues();
-		out.writeInt(_numRows);
-		out.writeInt(numCols);
-		out.writeInt(numVals);
-		out.writeBoolean(_zeros);
-		
-		//write col indices
-		for( int i=0; i<_colIndexes.length; i++ )
-			out.writeInt( _colIndexes[i] );
-		
-		//write distinct values
-		for( int i=0; i<_values.length; i++ )
-			out.writeDouble(_values[i]);
-
-		//write bitmaps (lens and data, offset later recreated)
-		int totalLen = 0;
-		for( int i=0; i<numVals; i++ )
-			totalLen += len(i);
-		out.writeInt(totalLen);	
-		for( int i=0; i<numVals; i++ ) {
-			int len = len(i);
-			int off = _ptr[i];
-			out.writeInt(len);
-			for( int j=0; j<len; j++ )
-				out.writeChar(_data[off+j]);
-		}
-	}
-
-	@Override
-	public long getExactSizeOnDisk() {
-		long ret = 13; //header
-		//col indices
-		ret += 4 * _colIndexes.length; 
-		//distinct values (groups of values)
-		ret += 8 * _values.length;
-		//actual bitmaps
-		ret += 4; //total length
-		for( int i=0; i<getNumValues(); i++ )
-			ret += 4 + 2 * len(i);
-		
-		return ret;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
new file mode 100644
index 0000000..1782e2e
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.Builtin;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
+import org.apache.sysml.runtime.functionobjects.ReduceAll;
+import org.apache.sysml.runtime.functionobjects.ReduceCol;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
+import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with
+ * dense dictionary encoding (DDC).
+ * 
+ * NOTE: zero values are included at position 0 in the value dictionary, which
+ * simplifies various operations such as counting the number of non-zeros.
+ */
+public abstract class ColGroupDDC extends ColGroupValue 
+{
+	private static final long serialVersionUID = -3204391646123465004L;
+
+	public ColGroupDDC() {
+		super();
+	}
+	
+	public ColGroupDDC(int[] colIndices, int numRows, UncompressedBitmap ubm) {
+		super(colIndices, numRows, ubm);
+	}
+	
+	protected ColGroupDDC(int[] colIndices, int numRows, double[] values) {
+		super(colIndices, numRows, values);
+	}
+	
+	@Override
+	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
+		for( int i = rl; i < ru; i++ ) {
+			for( int colIx = 0; colIx < _colIndexes.length; colIx++ ) {
+				int col = _colIndexes[colIx];
+				double cellVal = getData(i, colIx);
+				target.quickSetValue(i, col, cellVal);
+			}
+		}
+	}
+
+	@Override
+	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
+		int nrow = getNumRows();
+		int ncol = getNumCols();
+		for( int i = 0; i < nrow; i++ ) {
+			for( int colIx = 0; colIx < ncol; colIx++ ) {
+				int origMatrixColIx = getColIndex(colIx);
+				int col = colIndexTargets[origMatrixColIx];
+				double cellVal = getData(i, colIx);
+				target.quickSetValue(i, col, cellVal);
+			}
+		}
+	}
+
+	@Override
+	public void decompressToBlock(MatrixBlock target, int colpos) {
+		int nrow = getNumRows();
+		for( int i = 0; i < nrow; i++ ) {
+			double cellVal = getData(i, colpos);
+			target.quickSetValue(i, 0, cellVal);
+		}
+	}
+	
+	@Override
+	public double get(int r, int c) {
+		//find local column index
+		int ix = Arrays.binarySearch(_colIndexes, c);
+		if( ix < 0 )
+			throw new RuntimeException("Column index "+c+" not in DDC group.");
+		
+		//get value
+		return getData(r, ix);
+	}
+	
+
+	@Override
+	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
+		int ncol = getNumCols();
+		for( int i = rl; i < ru; i++ ) {
+			int lnnz = 0;
+			for( int colIx=0; colIx < ncol; colIx++ )
+				lnnz += (getData(i, colIx) != 0) ? 1 : 0;
+			rnnz[i-rl] += lnnz;
+		}
+	}
+	
+	@Override
+	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru)
+		throws DMLRuntimeException 
+	{
+		//sum and sumsq (reduceall/reducerow over tuples and counts)
+		if( op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq ) 
+		{
+			KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ?
+					KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject();
+			
+			if( op.indexFn instanceof ReduceAll )
+				computeSum(result, kplus);
+			else if( op.indexFn instanceof ReduceCol )
+				computeRowSums(result, kplus, rl, ru);
+			else if( op.indexFn instanceof ReduceRow )
+				computeColSums(result, kplus);
+		}
+		//min and max (reduceall/reducerow over tuples only)
+		else if(op.aggOp.increOp.fn instanceof Builtin 
+				&& (((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX 
+				|| ((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MIN)) 
+		{		
+			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
+
+			if( op.indexFn instanceof ReduceAll )
+				computeMxx(result, builtin, false);
+			else if( op.indexFn instanceof ReduceCol )
+				computeRowMxx(result, builtin, rl, ru);
+			else if( op.indexFn instanceof ReduceRow )
+				computeColMxx(result, builtin, false);
+		}
+	}
+	
+	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
+		int nrow = getNumRows();
+		int ncol = getNumCols();
+		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+		
+		for( int i=0; i<nrow; i++ )
+			for( int j=0; j<ncol; j++ )
+				kplus.execute2(kbuff, getData(i, j));
+		
+		result.quickSetValue(0, 0, kbuff._sum);
+		result.quickSetValue(0, 1, kbuff._correction);
+	}
+	
+	protected void computeColSums(MatrixBlock result, KahanFunction kplus) {
+		int nrow = getNumRows();
+		int ncol = getNumCols();
+		KahanObject[] kbuff = new KahanObject[getNumCols()];
+		for( int j=0; j<ncol; j++ )
+			kbuff[j] = new KahanObject(result.quickGetValue(0, _colIndexes[j]), 
+					result.quickGetValue(1, _colIndexes[j]));
+		
+		for( int i=0; i<nrow; i++ )
+			for( int j=0; j<ncol; j++ )
+				kplus.execute2(kbuff[j], getData(i, j));
+		
+		for( int j=0; j<ncol; j++ ) {
+			result.quickSetValue(0, _colIndexes[j], kbuff[j]._sum);
+			result.quickSetValue(1, _colIndexes[j], kbuff[j]._correction);
+		}
+	}
+
+	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+		int ncol = getNumCols();
+		KahanObject kbuff = new KahanObject(0, 0);
+		
+		for( int i=rl; i<ru; i++ ) {
+			kbuff.set(result.quickGetValue(i, 0), result.quickGetValue(i, 1));
+			for( int j=0; j<ncol; j++ )
+				kplus.execute2(kbuff, getData(i, j));
+			result.quickSetValue(i, 0, kbuff._sum);
+			result.quickSetValue(i, 1, kbuff._correction);
+		}
+	}
+	
+	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
+		double[] c = result.getDenseBlock();
+		int ncol = getNumCols();
+		
+		for( int i=rl; i<ru; i++ )
+			for( int j=0; j<ncol; j++ )
+				c[i] = builtin.execute2(c[i], getData(i, j));
+	}
+	
+	
+
+	/**
+	 * Generic get value for byte-length-agnostic access.
+	 * 
+	 * @param r global row index
+	 * @param colIx local column index 
+	 * @return value
+	 */
+	protected abstract double getData(int r, int colIx);
+	
+	/**
+	 * Generic set value for byte-length-agnostic write 
+	 * of encoded value.
+	 * 
+	 * @param r global row index
+	 * @param code encoded value 
+	 */
+	protected abstract void setData(int r, int code);
+	
+	@Override
+	public long estimateInMemorySize() {
+		return super.estimateInMemorySize();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
new file mode 100644
index 0000000..4db871f
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.ConverterUtils;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with
+ * dense dictionary encoding (DDC) using 1 byte codes.
+ */
+public class ColGroupDDC1 extends ColGroupDDC 
+{
+	private static final long serialVersionUID = 5204955589230760157L;
+	
+	private byte[] _data;
+
+	public ColGroupDDC1() {
+		super();
+	}
+	
+	public ColGroupDDC1(int[] colIndices, int numRows, UncompressedBitmap ubm) {
+		super(colIndices, numRows, ubm);
+		_data = new byte[numRows];
+		
+		int numVals = ubm.getNumValues();
+		int numCols = ubm.getNumColumns();
+		
+		//materialize zero values, if necessary
+		if( ubm.getNumOffsets() < (long)numRows * numCols ) {
+			int zeroIx = containsAllZeroValue();
+			if( zeroIx < 0 ) {
+				zeroIx = numVals;
+				_values = Arrays.copyOf(_values, _values.length+numCols);
+			}
+			Arrays.fill(_data, (byte)zeroIx);
+		}
+		
+		//iterate over values and write dictionary codes
+		for( int i=0; i<numVals; i++ ) {
+			int[] tmpList = ubm.getOffsetsList(i).extractValues();
+			int tmpListSize = ubm.getNumOffsets(i); 
+			for( int k=0; k<tmpListSize; k++ )
+				_data[tmpList[k]] = (byte)i;
+		}
+	}
+	
+	public ColGroupDDC1(int[] colIndices, int numRows, double[] values, byte[] data) {
+		super(colIndices, numRows, values);
+		_data = data;
+	}
+
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.DDC1;
+	}
+	
+	@Override
+	protected double getData(int r, int colIx) {
+		return _values[(_data[r]&0xFF)*getNumCols()+colIx];
+	}
+	
+	@Override
+	protected void setData(int r, int code) {
+		_data[r] = (byte)code;
+	}
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+		int numCols = getNumCols();
+		int numVals = getNumValues();
+		out.writeInt(_numRows);
+		out.writeInt(numCols);
+		out.writeInt(numVals);
+		
+		//write col indices
+		for( int i=0; i<_colIndexes.length; i++ )
+			out.writeInt( _colIndexes[i] );
+		
+		//write distinct values
+		for( int i=0; i<_values.length; i++ )
+			out.writeDouble(_values[i]);
+
+		//write data
+		for( int i=0; i<_numRows; i++ )
+			out.writeByte(_data[i]);
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		_numRows = in.readInt();
+		int numCols = in.readInt();
+		int numVals = in.readInt();
+		
+		//read col indices
+		_colIndexes = new int[ numCols ];
+		for( int i=0; i<numCols; i++ )
+			_colIndexes[i] = in.readInt();
+		
+		//read distinct values
+		_values = new double[numVals*numCols];
+		for( int i=0; i<numVals*numCols; i++ )
+			_values[i] = in.readDouble();
+		
+		//read data
+		_data = new byte[_numRows];
+		for( int i=0; i<_numRows; i++ )
+			_data[i] = in.readByte();
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = 12; //header
+		//col indices
+		ret += 4 * _colIndexes.length; 
+		//distinct values (groups of values)
+		ret += 8 * _values.length;
+		//data
+		ret += 1 * _data.length;
+		
+		return ret;
+	}
+
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		
+		//adding data size
+		if (_data != null)
+			size += _data.length;
+	
+		return size;
+	}
+	
+	@Override
+	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
+		int ncol = getNumCols();
+		for( int i = rl; i < ru; i++ )
+			for( int j=0; j<ncol; j++ )
+				target.appendValue(i, _colIndexes[j], _values[(_data[i]&0xFF)*ncol+j]);
+		//note: append ok because final sort per row 
+	}
+	
+	@Override
+	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		//pre-aggregate nnz per value tuple
+		int[] counts = new int[numVals];
+		for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol )
+			for( int j=0; j<ncol; j++ )
+				counts[k] += (_values[valOff+j]!=0) ? 1 : 0;
+		
+		//scan data and add counts to output rows
+		for( int i = rl; i < ru; i++ )
+			rnnz[i-rl] += counts[_data[i]&0xFF];
+	}
+	
+	@Override
+	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		double[] b = ConverterUtils.getDenseVector(vector);
+		double[] c = result.getDenseBlock();
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		
+		//prepare reduced rhs w/ relevant values
+		double[] sb = new double[numCols];
+		for (int j = 0; j < numCols; j++) {
+			sb[j] = b[_colIndexes[j]];
+		}
+		
+		//pre-aggregate all distinct values (guaranteed <=255)
+		double[] vals = preaggValues(numVals, sb);
+		
+		//iterative over codes and add to output
+		for( int i=rl; i<ru; i++ ) {
+			c[i] += vals[_data[i]&0xFF];
+		}
+	}
+	
+	public static void rightMultByVector(ColGroupDDC1[] grps, MatrixBlock vector, MatrixBlock result, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		double[] b = ConverterUtils.getDenseVector(vector);
+		double[] c = result.getDenseBlock();
+		
+		//prepare distinct values once
+		double[][] vals = new double[grps.length][];
+		for( int i=0; i<grps.length; i++ ) {
+			//prepare reduced rhs w/ relevant values
+			double[] sb = new double[grps[i].getNumCols()];
+			for (int j = 0; j < sb.length; j++) {
+				sb[j] = b[grps[i]._colIndexes[j]];
+			}	
+			//pre-aggregate all distinct values (guaranteed <=255)
+			vals[i] = grps[i].preaggValues(grps[i].getNumValues(), sb);
+		}
+		
+		//cache-conscious matrix-vector multiplication
+		//iterative over codes of all groups and add to output
+		int blksz = 2048; //16KB
+		for( int bi=rl; bi<ru; bi+=blksz )
+			for( int j=0; j<grps.length; j++ )
+				for( int i=bi; i<Math.min(bi+blksz, ru); i++ )
+					c[i] += vals[j][grps[j]._data[i]&0xFF];
+	}
+
+	@Override
+	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) throws DMLRuntimeException {
+		double[] a = ConverterUtils.getDenseVector(vector);
+		double[] c = result.getDenseBlock();
+		final int nrow = getNumRows();
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		if( 8*numVals < getNumRows() ) 
+		{
+			//iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
+			//temporary array also avoids false sharing in multi-threaded environments
+			double[] vals = new double[numVals];
+			for( int i=0; i<nrow; i++ ) {
+				vals[_data[i]&0xFF] += a[i];
+			}
+			
+			//post-scaling of pre-aggregate with distinct values
+			for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
+				double aval = vals[k];
+				for( int j=0; j<ncol; j++ ) {
+					int colIx = _colIndexes[j];
+					c[colIx] += aval * _values[valOff+j];
+				}	
+			}
+		}
+		else //general case
+		{
+			//iterate over codes, compute all, and add to the result
+			for( int i=0; i<nrow; i++ ) {
+				double aval = a[i];
+				if( aval != 0 ) {
+					int valOff = (_data[i]&0xFF) * ncol;
+					for( int j=0; j<ncol; j++ ) {
+						int colIx = _colIndexes[j];
+						c[colIx] += aval * _values[valOff+j];
+					}
+				}
+			}	
+		}
+	}
+	
+	@Override
+	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
+		final int nrow = getNumRows();
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		//iterative over codes and count per code (guaranteed <=255)
+		int[] counts = new int[numVals];
+		for( int i=0; i<nrow; i++ ) {
+			counts[_data[i]&0xFF] ++;
+		}
+		
+		//post-scaling of pre-aggregate with distinct values
+		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+		for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
+			int cntk = counts[k];
+			for( int j=0; j<ncol; j++ )
+				kplus.execute3(kbuff, _values[ valOff+j], cntk);
+		}
+		
+		result.quickSetValue(0, 0, kbuff._sum);
+		result.quickSetValue(0, 1, kbuff._correction);
+	}
+	
+	
+	@Override
+	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+		KahanObject kbuff = new KahanObject(0, 0);
+		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+		double[] c = result.getDenseBlock();
+		
+		//pre-aggregate nnz per value tuple
+		double[] vals = sumAllValues(kplus, kbuff);
+		
+		//scan data and add to result (use kahan plus not general KahanFunction
+		//for correctness in case of sqk+)
+		for( int i=rl; i<ru; i++ ) {
+			kbuff.set(c[2*i], c[2*i+1]);
+			kplus2.execute2(kbuff, vals[_data[i]&0xFF]);
+			c[2*i] = kbuff._sum;
+			c[2*i+1] = kbuff._correction;
+		}
+	}
+	
+	public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		KahanObject kbuff = new KahanObject(0, 0);
+		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+		double[] c = result.getDenseBlock();
+		
+		//prepare distinct values once
+		double[][] vals = new double[grps.length][];
+		for( int i=0; i<grps.length; i++ ) {
+			//pre-aggregate all distinct values (guaranteed <=255)
+			vals[i] = grps[i].sumAllValues(kplus, kbuff);
+		}
+		
+		//cache-conscious row sums operations 
+		//iterative over codes of all groups and add to output
+		//(use kahan plus not general KahanFunction for correctness in case of sqk+)
+		int blksz = 1024; //16KB
+		for( int bi=rl; bi<ru; bi+=blksz )
+			for( int j=0; j<grps.length; j++ )
+				for( int i=bi; i<Math.min(bi+blksz, ru); i++ ) { 
+					kbuff.set(c[2*i], c[2*i+1]);
+					kplus2.execute2(kbuff, vals[j][grps[j]._data[i]&0xFF]);
+					c[2*i] = kbuff._sum;
+					c[2*i+1] = kbuff._correction;
+				}
+	}
+	
+	@Override
+	public ColGroup scalarOperation(ScalarOperator op) throws DMLRuntimeException {
+		//fast path: sparse-safe and -unsafe operations
+		//as zero are represented, it is sufficient to simply apply the scalar op
+		return new ColGroupDDC1(_colIndexes, _numRows, applyScalarOp(op), _data);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
new file mode 100644
index 0000000..5f29979
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.ConverterUtils;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with
+ * dense dictionary encoding (DDC) using 2 byte codes.
+ */
+public class ColGroupDDC2 extends ColGroupDDC 
+{
+	private static final long serialVersionUID = -3995768285207071013L;
+	
+	private static final int MAX_TMP_VALS = 32*1024;
+	
+	private char[] _data;
+
+	public ColGroupDDC2() {
+		super();
+	}
+	
+	public ColGroupDDC2(int[] colIndices, int numRows, UncompressedBitmap ubm) {
+		super(colIndices, numRows, ubm);
+		_data = new char[numRows];
+		
+		int numVals = ubm.getNumValues();
+		int numCols = ubm.getNumColumns();
+		
+		//materialize zero values, if necessary
+		if( ubm.getNumOffsets() < (long)numRows * numCols ) {
+			int zeroIx = containsAllZeroValue();
+			if( zeroIx < 0 ) {
+				zeroIx = numVals;
+				_values = Arrays.copyOf(_values, _values.length+numCols);
+			}
+			Arrays.fill(_data, (char)zeroIx);
+		}
+		
+		//iterate over values and write dictionary codes
+		for( int i=0; i<numVals; i++ ) {
+			int[] tmpList = ubm.getOffsetsList(i).extractValues();
+			int tmpListSize = ubm.getNumOffsets(i); 
+			for( int k=0; k<tmpListSize; k++ )
+				_data[tmpList[k]] = (char)i;
+		}
+	}
+	
+	public ColGroupDDC2(int[] colIndices, int numRows, double[] values, char[] data) {
+		super(colIndices, numRows, values);
+		_data = data;
+	}
+
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.DDC2;
+	}
+	
+	@Override
+	protected double getData(int r, int colIx) {
+		return _values[_data[r]*getNumCols()+colIx];
+	}
+
+	@Override
+	protected void setData(int r, int code) {
+		_data[r] = (char)code;
+	}
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+		int numCols = getNumCols();
+		int numVals = getNumValues();
+		out.writeInt(_numRows);
+		out.writeInt(numCols);
+		out.writeInt(numVals);
+		
+		//write col indices
+		for( int i=0; i<_colIndexes.length; i++ )
+			out.writeInt( _colIndexes[i] );
+		
+		//write distinct values
+		for( int i=0; i<_values.length; i++ )
+			out.writeDouble(_values[i]);
+
+		//write data
+		for( int i=0; i<_numRows; i++ )
+			out.writeChar(_data[i]);
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		_numRows = in.readInt();
+		int numCols = in.readInt();
+		int numVals = in.readInt();
+		
+		//read col indices
+		_colIndexes = new int[ numCols ];
+		for( int i=0; i<numCols; i++ )
+			_colIndexes[i] = in.readInt();
+		
+		//read distinct values
+		_values = new double[numVals*numCols];
+		for( int i=0; i<numVals*numCols; i++ )
+			_values[i] = in.readDouble();
+		
+		//read data
+		_data = new char[_numRows];
+		for( int i=0; i<_numRows; i++ )
+			_data[i] = in.readChar();
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = 12; //header
+		//col indices
+		ret += 4 * _colIndexes.length; 
+		//distinct values (groups of values)
+		ret += 8 * _values.length;
+		//data
+		ret += 2 * _data.length;
+		
+		return ret;
+	}
+	
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		
+		//adding data size
+		if (_data != null)
+			size += 2 * _data.length;
+	
+		return size;
+	}
+	
+	@Override
+	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
+		int ncol = getNumCols();
+		for( int i = rl; i < ru; i++ )
+			for( int j=0; j<ncol; j++ )
+				target.appendValue(i, _colIndexes[j], _values[_data[i]*ncol+j]);
+		//note: append ok because final sort per row 
+	}
+	
+	@Override
+	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		//pre-aggregate nnz per value tuple
+		int[] counts = new int[numVals];
+		for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol )
+			for( int j=0; j<ncol; j++ )
+				counts[k] += (_values[valOff+j]!=0) ? 1 : 0;
+		
+		//scan data and add counts to output rows
+		for( int i = rl; i < ru; i++ )
+			rnnz[i-rl] += counts[_data[i]];
+	}
+	
+	@Override
+	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) throws DMLRuntimeException {
+		double[] b = ConverterUtils.getDenseVector(vector);
+		double[] c = result.getDenseBlock();
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+
+		//prepare reduced rhs w/ relevant values
+		double[] sb = new double[numCols];
+		for (int j = 0; j < numCols; j++) {
+			sb[j] = b[_colIndexes[j]];
+		}
+		
+		//pre-aggregate all distinct values 
+		double[] vals = preaggValues(numVals, sb);
+
+		//iterative over codes and add to output
+		for( int i=rl; i<ru; i++ )
+			c[i] += vals[_data[i]];
+	}
+
+	@Override
+	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) 
+		throws DMLRuntimeException 
+	{
+		double[] a = ConverterUtils.getDenseVector(vector);
+		double[] c = result.getDenseBlock();
+		final int nrow = getNumRows();
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		if( 8*numVals < getNumRows() )
+		{
+			//iterative over codes and pre-aggregate inputs per code
+			//temporary array also avoids false sharing in multi-threaded environments
+			double[] vals = new double[numVals];
+			for( int i=0; i<nrow; i++ ) {
+				vals[_data[i]] += a[i];
+			}
+			
+			//post-scaling of pre-aggregate with distinct values
+			for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
+				double aval = vals[k];
+				for( int j=0; j<ncol; j++ ) {
+					int colIx = _colIndexes[j];
+					c[colIx] += aval * _values[valOff+j];
+				}	
+			}
+		}
+		else //general case
+		{
+		
+			//iterate over codes, compute all, and add to the result
+			for( int i=0; i<nrow; i++ ) {
+				double aval = a[i];
+				if( aval != 0 ) {
+					int valOff = _data[i] * ncol;
+					for( int j=0; j<ncol; j++ ) {
+						int colIx = _colIndexes[j];
+						c[colIx] += aval * _values[valOff+j];
+					}
+				}
+			}
+		}
+	}
+	
+	@Override
+	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
+		final int nrow = getNumRows();
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
+		
+		if( numVals < MAX_TMP_VALS )
+		{
+			//iterative over codes and count per code
+			int[] counts = new int[numVals];
+			for( int i=0; i<nrow; i++ ) {
+				counts[_data[i]] ++;
+			}
+			
+			//post-scaling of pre-aggregate with distinct values
+			KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+			for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
+				int cntk = counts[k];
+				for( int j=0; j<ncol; j++ )
+					kplus.execute3(kbuff, _values[ valOff+j], cntk);
+			}
+			
+			result.quickSetValue(0, 0, kbuff._sum);
+			result.quickSetValue(0, 1, kbuff._correction);
+		}
+		else //general case 
+		{
+			super.computeSum(result, kplus);
+		}
+	}
+	
+	
+	@Override
+	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+		KahanObject kbuff = new KahanObject(0, 0);
+		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+		double[] c = result.getDenseBlock();
+		
+		//pre-aggregate nnz per value tuple
+		double[] vals = sumAllValues(kplus, kbuff);
+		
+		//scan data and add to result (use kahan plus not general KahanFunction
+		//for correctness in case of sqk+)
+		for( int i=rl; i<ru; i++ ) {
+			kbuff.set(c[2*i], c[2*i+1]);
+			kplus2.execute2(kbuff, vals[_data[i]]);
+			c[2*i] = kbuff._sum;
+			c[2*i+1] = kbuff._correction;
+		}
+	}
+	
+	@Override
+	public ColGroup scalarOperation(ScalarOperator op) throws DMLRuntimeException {
+		//fast path: sparse-safe and -unsafe operations
+		//as zero are represented, it is sufficient to simply apply the scalar op
+		return new ColGroupDDC2(_colIndexes, _numRows, applyScalarOp(op), _data);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
index 696adf2..f47a432 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -22,20 +22,16 @@ package org.apache.sysml.runtime.compress;
 import java.util.Arrays;
 import java.util.Iterator;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.utils.ConverterUtils;
 import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
 import org.apache.sysml.runtime.functionobjects.Builtin;
-import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
 import org.apache.sysml.runtime.functionobjects.KahanFunction;
 import org.apache.sysml.runtime.functionobjects.KahanPlus;
-import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
-import org.apache.sysml.runtime.functionobjects.ReduceAll;
-import org.apache.sysml.runtime.functionobjects.ReduceCol;
-import org.apache.sysml.runtime.functionobjects.ReduceRow;
 import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
 import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
 
 /**
@@ -43,12 +39,14 @@ import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
  * simple lists of offsets for each set of distinct values.
  * 
  */
-public class ColGroupOLE extends ColGroupBitmap 
+public class ColGroupOLE extends ColGroupOffset 
 {
 	private static final long serialVersionUID = -9157676271360528008L;
 
+	private static final Log LOG = LogFactory.getLog(ColGroupOLE.class.getName());
+	
 	public ColGroupOLE() {
-		super(CompressionType.OLE_BITMAP);
+		super();
 	}
 	
 	/**
@@ -64,14 +62,15 @@ public class ColGroupOLE extends ColGroupBitmap
 	 */
 	public ColGroupOLE(int[] colIndices, int numRows, UncompressedBitmap ubm) 
 	{
-		super(CompressionType.OLE_BITMAP, colIndices, numRows, ubm);
+		super(colIndices, numRows, ubm);
 
 		// compress the bitmaps
 		final int numVals = ubm.getNumValues();
 		char[][] lbitmaps = new char[numVals][];
 		int totalLen = 0;
 		for( int i=0; i<numVals; i++ ) {
-			lbitmaps[i] = BitmapEncoder.genOffsetBitmap(ubm.getOffsetsList(i));
+			lbitmaps[i] = BitmapEncoder.genOffsetBitmap(
+				ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i));
 			totalLen += lbitmaps[i].length;
 		}
 		
@@ -95,13 +94,24 @@ public class ColGroupOLE extends ColGroupBitmap
 				_skiplist[k] = bix;
 			}		
 		}
+		
+		//debug output
+		double ucSize = MatrixBlock.estimateSizeDenseInMemory(numRows, colIndices.length);
+		if( estimateInMemorySize() > ucSize )
+			LOG.warn("OLE group larger than UC dense: "+estimateInMemorySize()+" "+ucSize);
 	}
 
 	public ColGroupOLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps, int[] bitmapOffs) {
-		super(CompressionType.OLE_BITMAP, colIndices, numRows, zeros, values);
+		super(colIndices, numRows, zeros, values);
 		_data = bitmaps;
 		_ptr = bitmapOffs;
 	}
+	
+
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.OLE_BITMAP;
+	}
 
 	@Override
 	public Iterator<Integer> getDecodeIterator(int k) {
@@ -251,7 +261,7 @@ public class ColGroupOLE extends ColGroupBitmap
 		}
 		
 		double[] rvalues = applyScalarOp(op, val0, getNumCols());		
-		char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff);
+		char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff, loff.length);
 		char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length);
 		System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
 		int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1);
@@ -284,7 +294,7 @@ public class ColGroupOLE extends ColGroupBitmap
 			//best configuration aligns with L3 cache size (x*vcores*64K*8B < L3)
 			//x=4 leads to a good yet slightly conservative compromise for single-/
 			//multi-threaded and typical number of cores and L3 cache sizes
-			final int blksz2 = ColGroupBitmap.WRITE_CACHE_BLKSZ;
+			final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
 			
 			//step 1: prepare position and value arrays
 			int[] apos = skipScan(numVals, rl);
@@ -365,7 +375,7 @@ public class ColGroupOLE extends ColGroupBitmap
 		if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
 		{
 			//cache blocking config (see matrix-vector mult for explanation)
-			final int blksz2 = ColGroupBitmap.READ_CACHE_BLKSZ;
+			final int blksz2 = ColGroupOffset.READ_CACHE_BLKSZ;
 			
 			//step 1: prepare position and value arrays
 			
@@ -426,46 +436,7 @@ public class ColGroupOLE extends ColGroupBitmap
 	}
 
 	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) 
-		throws DMLRuntimeException 
-	{
-		unaryAggregateOperations(op, result, 0, getNumRows());
-	}
-	
-	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) 
-		throws DMLRuntimeException 
-	{
-		//sum and sumsq (reduceall/reducerow over tuples and counts)
-		if( op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq ) 
-		{
-			KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ?
-					KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject();
-			
-			if( op.indexFn instanceof ReduceAll )
-				computeSum(result, kplus);
-			else if( op.indexFn instanceof ReduceCol )
-				computeRowSums(result, kplus, rl, ru);
-			else if( op.indexFn instanceof ReduceRow )
-				computeColSums(result, kplus);
-		}
-		//min and max (reduceall/reducerow over tuples only)
-		else if(op.aggOp.increOp.fn instanceof Builtin 
-				&& (((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX 
-				|| ((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MIN)) 
-		{		
-			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
-
-			if( op.indexFn instanceof ReduceAll )
-				computeMxx(result, builtin);
-			else if( op.indexFn instanceof ReduceCol )
-				computeRowMxx(result, builtin, rl, ru);
-			else if( op.indexFn instanceof ReduceRow )
-				computeColMxx(result, builtin);
-		}
-	}
-
-	private void computeSum(MatrixBlock result, KahanFunction kplus)
+	protected final void computeSum(MatrixBlock result, KahanFunction kplus)
 	{
 		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
 		
@@ -493,41 +464,88 @@ public class ColGroupOLE extends ColGroupBitmap
 		result.quickSetValue(0, 1, kbuff._correction);
 	}
 
-	private void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
+	@Override
+	protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
 	{
 		KahanObject kbuff = new KahanObject(0, 0);
-	
+		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+		
 		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
 		final int numVals = getNumValues();
 		double[] c = result.getDenseBlock();
 		
-		//iterate over all values and their bitmaps
-		for (int k = 0; k < numVals; k++) 
+		if( ALLOW_CACHE_CONSCIOUS_ROWSUMS &&
+			LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
 		{
-			//prepare value-to-add for entire value bitmap
-			int boff = _ptr[k];
-			int blen = len(k);
-			double val = sumValues(k);
+			final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ/2;
 			
-			//iterate over bitmap blocks and add values
-			if (val != 0) {
-				int slen;
-				int bix = skipScanVal(k, rl);
-				for( int off=bix*blksz; bix<blen && off<ru; bix+=slen+1, off+=blksz ) {
-					slen = _data[boff+bix];
-					for (int i = 1; i <= slen; i++) {
-						int rix = off + _data[boff+bix + i];
-						kbuff.set(c[2*rix], c[2*rix+1]);
-						kplus.execute2(kbuff, val);
-						c[2*rix] = kbuff._sum;
-						c[2*rix+1] = kbuff._correction;
+			//step 1: prepare position and value arrays
+			int[] apos = skipScan(numVals, rl);
+			double[] aval = sumAllValues(kplus, kbuff);
+					
+			//step 2: cache conscious row sums via horizontal scans 
+			for( int bi=rl; bi<ru; bi+=blksz2 ) 
+			{
+				int bimax = Math.min(bi+blksz2, ru);
+				
+				//horizontal segment scan, incl pos maintenance
+				for (int k = 0; k < numVals; k++) {
+					int boff = _ptr[k];
+					int blen = len(k);
+					double val = aval[k];
+					int bix = apos[k];
+					
+					for( int ii=bi; ii<bimax && bix<blen; ii+=blksz ) {
+						//prepare length, start, and end pos
+						int len = _data[boff+bix];
+						int pos = boff+bix+1;
+						
+						//compute partial results
+						for (int i = 0; i < len; i++) {
+							int rix = ii + _data[pos + i];
+							kbuff.set(c[2*rix], c[2*rix+1]);
+							kplus2.execute2(kbuff, val);
+							c[2*rix] = kbuff._sum;
+							c[2*rix+1] = kbuff._correction;
+						}
+						bix += len + 1;
+					}
+
+					apos[k] = bix;
+				}
+			}		
+		}
+		else
+		{
+			//iterate over all values and their bitmaps
+			for (int k = 0; k < numVals; k++) 
+			{
+				//prepare value-to-add for entire value bitmap
+				int boff = _ptr[k];
+				int blen = len(k);
+				double val = sumValues(k, kplus, kbuff);
+				
+				//iterate over bitmap blocks and add values
+				if (val != 0) {
+					int slen;
+					int bix = skipScanVal(k, rl);
+					for( int off=((rl+1)/blksz)*blksz; bix<blen && off<ru; bix+=slen+1, off+=blksz ) {
+						slen = _data[boff+bix];
+						for (int i = 1; i <= slen; i++) {
+							int rix = off + _data[boff+bix + i];
+							kbuff.set(c[2*rix], c[2*rix+1]);
+							kplus2.execute2(kbuff, val);
+							c[2*rix] = kbuff._sum;
+							c[2*rix+1] = kbuff._correction;
+						}
 					}
 				}
 			}
 		}
 	}
 
-	private void computeColSums(MatrixBlock result, KahanFunction kplus)
+	@Override
+	protected final void computeColSums(MatrixBlock result, KahanFunction kplus)
 	{
 		KahanObject kbuff = new KahanObject(0, 0);
 		
@@ -555,7 +573,8 @@ public class ColGroupOLE extends ColGroupBitmap
 		}
 	}
 
-	private void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
+	@Override
+	protected final void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
 	{
 		//NOTE: zeros handled once for all column groups outside
 		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
@@ -624,7 +643,7 @@ public class ColGroupOLE extends ColGroupBitmap
 	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru)
 	{
 		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
-		final int blksz2 = ColGroupBitmap.WRITE_CACHE_BLKSZ;
+		final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
 		final int numVals = getNumValues();
 		final int numCols = getNumCols();
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
new file mode 100644
index 0000000..e49c1a3
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
@@ -0,0 +1,424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
+import org.apache.sysml.runtime.functionobjects.Builtin;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
+import org.apache.sysml.runtime.functionobjects.ReduceAll;
+import org.apache.sysml.runtime.functionobjects.ReduceCol;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
+import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+
+
+/**
+ * Base class for column groups encoded with various types of bitmap encoding.
+ * 
+ * 
+ * NOTES:
+ *  * OLE: separate storage segment length and bitmaps led to a 30% improvement
+ *    but not applied because more difficult to support both data layouts at the
+ *    same time (distributed/local as well as w/ and w/o low-level opt)
+ */
+public abstract class ColGroupOffset extends ColGroupValue 
+{
+	private static final long serialVersionUID = -1635828933479403125L;
+
+	protected static final boolean CREATE_SKIPLIST = true;
+	
+	protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+	public static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+	public static boolean ALLOW_CACHE_CONSCIOUS_ROWSUMS = true;
+	
+	/** Bitmaps, one per uncompressed value in {@link #_values}. */
+	protected int[] _ptr; //bitmap offsets per value
+	protected char[] _data; //linearized bitmaps (variable length)
+	protected boolean _zeros; //contains zero values
+	
+	protected int[] _skiplist;
+	
+	public ColGroupOffset() {
+		super();
+	}
+	
+	/**
+	 * Main constructor. Stores the headers for the individual bitmaps.
+	 * 
+	 * @param colIndices
+	 *            indices (within the block) of the columns included in this
+	 *            column
+	 * @param numRows
+	 *            total number of rows in the parent block
+	 * @param ubm
+	 *            Uncompressed bitmap representation of the block
+	 */
+	public ColGroupOffset(int[] colIndices, int numRows, UncompressedBitmap ubm) {
+		super(colIndices, numRows, ubm);
+		_zeros = (ubm.getNumOffsets() < numRows);
+	}
+
+	/**
+	 * Constructor for subclass methods that need to create shallow copies
+	 * 
+	 * @param type compression type
+	 * @param colIndices
+	 *            raw column index information
+	 * @param numRows
+	 *            number of rows in the block
+	 * @param zeros ?
+	 * @param values
+	 *            set of distinct values for the block (associated bitmaps are
+	 *            kept in the subclass)
+	 */
+	protected ColGroupOffset(int[] colIndices, int numRows, boolean zeros, double[] values) {
+		super(colIndices, numRows, values);
+		_zeros = zeros;
+	}
+	
+	protected final int len(int k) {
+		return _ptr[k+1] - _ptr[k];
+	}
+
+	protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps) {
+		// compact bitmaps to linearized representation
+		_ptr = new int[numVals+1];
+		_data = new char[totalLen];
+		for( int i=0, off=0; i<numVals; i++ ) {
+			int len = lbitmaps[i].length;
+			_ptr[i] = off;
+			System.arraycopy(lbitmaps[i], 0, _data, off, len);
+			off += len;
+		}
+		_ptr[numVals] = totalLen;
+	}
+	
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		
+		// adding bitmaps size
+		size += 16; //array references
+		if (_data != null) {
+			size += 32 + _ptr.length * 4; // offsets
+			size += 32 + _data.length * 2;    // bitmaps
+		}
+	
+		return size;
+	}
+
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target, int rl, int ru) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		int[] colIndices = getColIndices();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				if( row<rl ) continue;
+				if( row>ru ) break;
+				
+				for (int colIx = 0; colIx < numCols; colIx++)
+					target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
+			}
+		}
+	}
+
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				for (int colIx = 0; colIx < numCols; colIx++) {
+					int origMatrixColIx = getColIndex(colIx);
+					int targetColIx = colIndexTargets[origMatrixColIx];
+					target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
+				}
+			}
+		}
+	}
+	
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target, int colpos) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				target.quickSetValue(row, 0, _values[valOff+colpos]);
+			}
+		}
+	}
+
+	//generic get for OLE/RLE, to be overwritten for performance
+	//potential: skip scan (segment length agg and run length) instead of decode
+	@Override
+	public double get(int r, int c) {
+		//find local column index
+		int ix = Arrays.binarySearch(_colIndexes, c);
+		if( ix < 0 )
+			throw new RuntimeException("Column index "+c+" not in bitmap group.");
+		
+		//find row index in value offset lists via scan
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				if( row == r )
+					return _values[valOff+ix];
+				else if( row > r )
+					break; //current value
+			}
+		}		
+		return 0;
+	}
+
+	protected final void sumAllValues(double[] b, double[] c)
+	{
+		final int numVals = getNumValues();
+		final int numCols = getNumCols();
+		
+		//vectMultiplyAdd over cols instead of dotProduct over vals because
+		//usually more values than columns
+		for( int i=0, off=0; i<numCols; i++, off+=numVals )
+			LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
+	}
+
+	protected final double mxxValues(int bitmapIx, Builtin builtin)
+	{
+		final int numCols = getNumCols();
+		final int valOff = bitmapIx * numCols;
+		
+		double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1);
+		for( int i = 0; i < numCols; i++ )
+			val = builtin.execute2(val, _values[valOff+i]);
+		
+		return val;
+	}
+
+	public char[] getBitmaps() {
+		return _data;
+	}
+	
+	public int[] getBitmapOffsets() {
+		return _ptr;
+	}
+
+	public boolean hasZeros() {
+		return _zeros;
+	}
+	
+	/**
+	 * @param k
+	 *            index of a specific compressed bitmap (stored in subclass,
+	 *            index same as {@link #getValues})
+	 * @return an object for iterating over the row offsets in this bitmap. Only
+	 *         valid until the next call to this method. May be reused across
+	 *         calls.
+	 */
+	public abstract Iterator<Integer> getDecodeIterator(int k);
+
+	//TODO getDecodeIterator(int k, int rl, int ru)
+
+	/**
+	 * Utility function of sparse-unsafe operations.
+	 * 
+	 * @param ind row indicator vector of non zeros
+	 * @return offsets
+	 * @throws DMLRuntimeException if DMLRuntimeException occurs
+	 */
+	protected int[] computeOffsets(boolean[] ind)
+		throws DMLRuntimeException 
+	{
+		//determine number of offsets
+		int numOffsets = 0;
+		for( int i=0; i<ind.length; i++ )
+			numOffsets += ind[i] ? 1 : 0;
+		
+		//create offset lists
+		int[] ret = new int[numOffsets];
+		for( int i=0, pos=0; i<ind.length; i++ )
+			if( ind[i] )
+				ret[pos++] = i;
+		
+		return ret;
+	}
+
+	@Override
+	public void readFields(DataInput in) 
+		throws IOException 
+	{
+		_numRows = in.readInt();
+		int numCols = in.readInt();
+		int numVals = in.readInt();
+		_zeros = in.readBoolean();
+		
+		//read col indices
+		_colIndexes = new int[ numCols ];
+		for( int i=0; i<numCols; i++ )
+			_colIndexes[i] = in.readInt();
+		
+		//read distinct values
+		_values = new double[numVals*numCols];
+		for( int i=0; i<numVals*numCols; i++ )
+			_values[i] = in.readDouble();
+		
+		//read bitmaps
+		int totalLen = in.readInt();
+		_ptr = new int[numVals+1];
+		_data = new char[totalLen];		
+		for( int i=0, off=0; i<numVals; i++ ) {
+			int len = in.readInt();
+			_ptr[i] = off;
+			for( int j=0; j<len; j++ )
+				_data[off+j] = in.readChar();
+			off += len;
+		}
+		_ptr[numVals] = totalLen;
+	}
+	
+	@Override
+	public void write(DataOutput out) 
+		throws IOException 
+	{
+		int numCols = getNumCols();
+		int numVals = getNumValues();
+		out.writeInt(_numRows);
+		out.writeInt(numCols);
+		out.writeInt(numVals);
+		out.writeBoolean(_zeros);
+		
+		//write col indices
+		for( int i=0; i<_colIndexes.length; i++ )
+			out.writeInt( _colIndexes[i] );
+		
+		//write distinct values
+		for( int i=0; i<_values.length; i++ )
+			out.writeDouble(_values[i]);
+
+		//write bitmaps (lens and data, offset later recreated)
+		int totalLen = 0;
+		for( int i=0; i<numVals; i++ )
+			totalLen += len(i);
+		out.writeInt(totalLen);	
+		for( int i=0; i<numVals; i++ ) {
+			int len = len(i);
+			int off = _ptr[i];
+			out.writeInt(len);
+			for( int j=0; j<len; j++ )
+				out.writeChar(_data[off+j]);
+		}
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = 13; //header
+		//col indices
+		ret += 4 * _colIndexes.length; 
+		//distinct values (groups of values)
+		ret += 8 * _values.length;
+		//actual bitmaps
+		ret += 4; //total length
+		for( int i=0; i<getNumValues(); i++ )
+			ret += 4 + 2 * len(i);
+		
+		return ret;
+	}
+	
+
+	
+	@Override
+	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) 
+		throws DMLRuntimeException 
+	{
+		//sum and sumsq (reduceall/reducerow over tuples and counts)
+		if( op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq ) 
+		{
+			KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ?
+					KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject();
+			
+			if( op.indexFn instanceof ReduceAll )
+				computeSum(result, kplus);
+			else if( op.indexFn instanceof ReduceCol )
+				computeRowSums(result, kplus, rl, ru);
+			else if( op.indexFn instanceof ReduceRow )
+				computeColSums(result, kplus);
+		}
+		//min and max (reduceall/reducerow over tuples only)
+		else if(op.aggOp.increOp.fn instanceof Builtin 
+				&& (((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX 
+				|| ((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MIN)) 
+		{		
+			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
+
+			if( op.indexFn instanceof ReduceAll )
+				computeMxx(result, builtin, _zeros);
+			else if( op.indexFn instanceof ReduceCol )
+				computeRowMxx(result, builtin, rl, ru);
+			else if( op.indexFn instanceof ReduceRow )
+				computeColMxx(result, builtin, _zeros);
+		}
+	}
+	
+	protected abstract void computeSum(MatrixBlock result, KahanFunction kplus);
+	
+	protected abstract void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru);
+	
+	protected abstract void computeColSums(MatrixBlock result, KahanFunction kplus);
+	
+	protected abstract void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru);
+	
+}


[2/5] incubator-systemml git commit: [SYSTEMML-449] Compressed linear algebra v2

Posted by mb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixVectorMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixVectorMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixVectorMultTest.java
index 29b467d..5a19f6e 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixVectorMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixVectorMultTest.java
@@ -49,9 +49,10 @@ public class BasicMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class BasicMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class BasicMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -149,8 +160,10 @@ public class BasicMatrixVectorMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 1, 1, 1.0, 3));
@@ -176,5 +189,8 @@ public class BasicMatrixVectorMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsSparseUnsafeTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsSparseUnsafeTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsSparseUnsafeTest.java
index 55497a6..218739b 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsSparseUnsafeTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsSparseUnsafeTest.java
@@ -47,9 +47,10 @@ public class BasicScalarOperationsSparseUnsafeTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -73,13 +74,23 @@ public class BasicScalarOperationsSparseUnsafeTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -108,13 +119,13 @@ public class BasicScalarOperationsSparseUnsafeTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -147,8 +158,10 @@ public class BasicScalarOperationsSparseUnsafeTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -173,5 +186,8 @@ public class BasicScalarOperationsSparseUnsafeTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsTest.java
index ec708a7..ed6f25c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicScalarOperationsTest.java
@@ -47,9 +47,10 @@ public class BasicScalarOperationsTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -73,13 +74,23 @@ public class BasicScalarOperationsTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -108,13 +119,13 @@ public class BasicScalarOperationsTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runScalarOperationsTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runScalarOperationsTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -147,8 +158,10 @@ public class BasicScalarOperationsTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -173,5 +186,8 @@ public class BasicScalarOperationsTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicTransposeSelfLeftMatrixMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicTransposeSelfLeftMatrixMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicTransposeSelfLeftMatrixMultTest.java
index dfbe453..eb53024 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicTransposeSelfLeftMatrixMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicTransposeSelfLeftMatrixMultTest.java
@@ -45,9 +45,10 @@ public class BasicTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -71,13 +72,23 @@ public class BasicTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -106,13 +117,13 @@ public class BasicTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -145,8 +156,10 @@ public class BasicTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -168,5 +181,8 @@ public class BasicTransposeSelfLeftMatrixMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
index aca54aa..7f87219 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
@@ -46,9 +46,10 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	public enum AggType {
@@ -87,13 +88,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
 	}
 	
 	@Test
@@ -122,13 +133,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
@@ -157,13 +168,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
 	}
 	
 	@Test
@@ -192,13 +213,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
@@ -227,13 +248,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
 	}
 	
 	@Test
@@ -262,13 +293,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
@@ -297,13 +328,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
@@ -332,13 +373,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
@@ -367,13 +408,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
@@ -402,13 +453,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
@@ -437,13 +488,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
 	}
 	
 	@Test
@@ -472,13 +533,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
@@ -507,13 +568,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
 	}
 	
 	@Test
@@ -542,13 +613,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
@@ -577,13 +648,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
 	}
 	
 	@Test
@@ -612,13 +693,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
@@ -647,13 +728,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
 	}
 	
 	@Test
@@ -682,13 +773,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
@@ -717,13 +808,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
 	}
 	
 	@Test
@@ -752,13 +853,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
@@ -787,13 +888,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
 	}
 	
 	@Test
@@ -822,13 +933,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
@@ -857,13 +968,23 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
 	}
 	
 	@Test
@@ -892,13 +1013,13 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
@@ -930,8 +1051,10 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols1, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
 			
@@ -975,5 +1098,8 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicVectorMatrixMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicVectorMatrixMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicVectorMatrixMultTest.java
index c9b7ec4..69a8016 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicVectorMatrixMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicVectorMatrixMultTest.java
@@ -49,9 +49,10 @@ public class BasicVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class BasicVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class BasicVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -149,8 +160,10 @@ public class BasicVectorMatrixMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(1, rows, 1, 1, 1.0, 3));
@@ -176,5 +189,8 @@ public class BasicVectorMatrixMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java
index a74f784..7b10396 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysml.test.integration.AutomatedTestBase;
@@ -140,11 +141,11 @@ public class CompressedLinregCG extends AutomatedTestBase
 			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
 			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
 		}
-		finally
-		{
+		finally {
 			rtplatform = platformOld;
 			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
 			InfrastructureAnalyzer.setLocalMaxMemory(memOld);		
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedSerializationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedSerializationTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedSerializationTest.java
index 9405aa8..b0857a1 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedSerializationTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedSerializationTest.java
@@ -49,9 +49,10 @@ public class CompressedSerializationTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class CompressedSerializationTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runCompressedSerializationTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runCompressedSerializationTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runCompressedSerializationTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runCompressedSerializationTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runCompressedSerializationTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runCompressedSerializationTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class CompressedSerializationTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runCompressedSerializationTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runCompressedSerializationTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runCompressedSerializationTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runCompressedSerializationTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -150,8 +161,10 @@ public class CompressedSerializationTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -181,5 +194,8 @@ public class CompressedSerializationTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeCompressionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeCompressionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeCompressionTest.java
index 4f9101c..d90118c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeCompressionTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeCompressionTest.java
@@ -45,9 +45,10 @@ public class LargeCompressionTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -71,13 +72,23 @@ public class LargeCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -106,13 +117,13 @@ public class LargeCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -145,8 +156,10 @@ public class LargeCompressionTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -165,5 +178,8 @@ public class LargeCompressionTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeMatrixVectorMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeMatrixVectorMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeMatrixVectorMultTest.java
index d2da1a6..f5a5a4b 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeMatrixVectorMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeMatrixVectorMultTest.java
@@ -49,9 +49,10 @@ public class LargeMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class LargeMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class LargeMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -149,8 +160,10 @@ public class LargeMatrixVectorMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 1, 1, 1.0, 3));
@@ -176,5 +189,8 @@ public class LargeMatrixVectorMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParMatrixVectorMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParMatrixVectorMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParMatrixVectorMultTest.java
index 6cdceee..4b6d033 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParMatrixVectorMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParMatrixVectorMultTest.java
@@ -50,9 +50,10 @@ public class LargeParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -76,13 +77,23 @@ public class LargeParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -111,13 +122,13 @@ public class LargeParMatrixVectorMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -150,8 +161,10 @@ public class LargeParMatrixVectorMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 1, 1, 1.0, 3));
@@ -178,5 +191,8 @@ public class LargeParMatrixVectorMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParUnaryAggregateTest.java
index 6cd1f35..6d2585a 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeParUnaryAggregateTest.java
@@ -49,9 +49,10 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	public enum AggType {
@@ -90,13 +91,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, true);
+	public void testRowSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
+	}
+	
+	@Test
+	public void testRowSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMS, true);
 	}
 	
 	@Test
@@ -125,13 +136,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
-	public void testRowSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMS, false);
+	public void testRowSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMS, false);
 	}
 	
 	@Test
@@ -160,13 +171,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, true);
+	}
+	
+	@Test
+	public void testColSumsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, true);
+	public void testColSumsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMS, true);
 	}
 	
 	@Test
@@ -195,13 +216,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
-	public void testColSumsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMS, false);
+	public void testColSumsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMS, false);
 	}
 	
 	@Test
@@ -230,13 +251,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, true);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, true);
+	public void testSumDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
+	}
+	
+	@Test
+	public void testSumSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUM, true);
 	}
 	
 	@Test
@@ -265,13 +296,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
-	public void testSumSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUM, false);
+	public void testSumSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUM, false);
 	}
 	
 	@Test
@@ -300,13 +331,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, true);
+	public void testRowSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
+	}
+	
+	@Test
+	public void testRowSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWSUMSSQ, true);
 	}
 	
 	@Test
@@ -314,10 +355,10 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 		runUnaryAggregateTest(SparsityType.DENSE, ValueType.CONST, AggType.ROWSUMSSQ, true);
 	}
 	
-	//@Test
-	//public void testRowSumsSqSparseConstDataCompression() {
-	//	runUnaryAggregateTest(SparsityType.SPARSE, ValueType.CONST, AggType.ROWSUMSSQ, true);
-	//}
+	@Test
+	public void testRowSumsSqSparseConstDataCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.CONST, AggType.ROWSUMSSQ, true);
+	}
 	
 	@Test
 	public void testRowSumsSqDenseRandDataNoCompression() {
@@ -335,13 +376,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
-	public void testRowSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWSUMSSQ, false);
+	public void testRowSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWSUMSSQ, false);
 	}
 	
 	@Test
@@ -370,13 +411,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, true);
+	public void testColSumsSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
+	}
+	
+	@Test
+	public void testColSumsSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLSUMSSQ, true);
 	}
 	
 	@Test
@@ -405,13 +456,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColSumsSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
-	public void testColSumsSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLSUMSSQ, false);
+	public void testColSumsSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLSUMSSQ, false);
 	}
 	
 	@Test
@@ -440,13 +491,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, true);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, true);
+	public void testSumSqDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
+	}
+	
+	@Test
+	public void testSumSqSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.SUMSQ, true);
 	}
 	
 	@Test
@@ -475,13 +536,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testSumSqDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
-	public void testSumSqSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.SUMSQ, false);
+	public void testSumSqSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.SUMSQ, false);
 	}
 	
 	@Test
@@ -494,7 +555,6 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.CONST, AggType.SUMSQ, false);
 	}
 	
-
 	@Test
 	public void testRowMaxsDenseRandDataCompression() {
 		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND, AggType.ROWMAXS, true);
@@ -511,13 +571,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, true);
+	public void testRowMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
+	}
+	
+	@Test
+	public void testRowMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMAXS, true);
 	}
 	
 	@Test
@@ -546,13 +616,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
-	public void testRowMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMAXS, false);
+	public void testRowMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMAXS, false);
 	}
 	
 	@Test
@@ -581,13 +651,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, true);
+	public void testColMaxsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
+	}
+	
+	@Test
+	public void testColMaxsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMAXS, true);
 	}
 	
 	@Test
@@ -616,13 +696,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMaxsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
-	public void testColMaxsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMAXS, false);
+	public void testColMaxsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMAXS, false);
 	}
 	
 	@Test
@@ -651,13 +731,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, true);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, true);
+	public void testMaxDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
+	}
+	
+	@Test
+	public void testMaxSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MAX, true);
 	}
 	
 	@Test
@@ -686,13 +776,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMaxDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
-	public void testMaxSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MAX, false);
+	public void testMaxSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MAX, false);
 	}
 	
 	@Test
@@ -721,13 +811,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, true);
+	}
+	
+	@Test
+	public void testRowMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, true);
+	public void testRowMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.ROWMINS, true);
 	}
 	
 	@Test
@@ -756,13 +856,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testRowMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
-	public void testRowMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.ROWMINS, false);
+	public void testRowMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.ROWMINS, false);
 	}
 	
 	@Test
@@ -791,13 +891,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, true);
+	public void testColMinsSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
+	}
+	
+	@Test
+	public void testColMinsSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.COLMINS, true);
 	}
 	
 	@Test
@@ -826,13 +936,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testColMinsDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
-	public void testColMinsSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.COLMINS, false);
+	public void testColMinsSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.COLMINS, false);
 	}
 	
 	@Test
@@ -861,13 +971,23 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinDenseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, true);
+	public void testMinSparseRoundRandDataOLECompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinDenseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
+	}
+	
+	@Test
+	public void testMinSparseRoundRandDataDDCCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, AggType.MIN, true);
 	}
 	
 	@Test
@@ -896,13 +1016,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testMinDenseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinDenseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
-	public void testMinSparseRoundRandDataNoCompression() {
-		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND, AggType.MIN, false);
+	public void testMinSparseRoundRandDataOLENoCompression() {
+		runUnaryAggregateTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, AggType.MIN, false);
 	}
 	
 	@Test
@@ -934,8 +1054,10 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
 			
@@ -975,10 +1097,13 @@ public class LargeParUnaryAggregateTest extends AutomatedTestBase
 					|| aggtype == AggType.ROWMINS || aggtype == AggType.ROWMINS)?rows:1;
 			int dim2 = (aggtype == AggType.COLSUMS || aggtype == AggType.COLSUMSSQ 
 					|| aggtype == AggType.COLMAXS || aggtype == AggType.COLMINS)?cols:1;
-			TestUtils.compareMatrices(d1, d2, dim1, dim2, 0.00000000001);
+			TestUtils.compareMatrices(d1, d2, dim1, dim2, 0.000000001);
 		}
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeVectorMatrixMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeVectorMatrixMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeVectorMatrixMultTest.java
index 8335ca4..4da0a79 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeVectorMatrixMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/LargeVectorMatrixMultTest.java
@@ -49,9 +49,10 @@ public class LargeVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -75,13 +76,23 @@ public class LargeVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -110,13 +121,13 @@ public class LargeVectorMatrixMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixVectorMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -149,8 +160,10 @@ public class LargeVectorMatrixMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(1, rows, 1, 1, 1.0, 3));
@@ -176,5 +189,8 @@ public class LargeVectorMatrixMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/ParCompressionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParCompressionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParCompressionTest.java
index 603584c..a7b42d7 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParCompressionTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParCompressionTest.java
@@ -47,9 +47,10 @@ public class ParCompressionTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -73,13 +74,23 @@ public class ParCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -108,13 +119,13 @@ public class ParCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -148,8 +159,10 @@ public class ParCompressionTest extends AutomatedTestBase
 			int k = InfrastructureAnalyzer.getLocalParallelism();
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -168,5 +181,8 @@ public class ParCompressionTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }



[3/5] incubator-systemml git commit: [SYSTEMML-449] Compressed linear algebra v2

Posted by mb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerStatic.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
new file mode 100644
index 0000000..e2f00f3
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
+
+/**
+ * Column group partitioning with static distribution heuristic.
+ * 
+ */
+public class ColumnGroupPartitionerStatic extends ColumnGroupPartitioner
+{
+	private static final int MAX_COL_PER_GROUP = 20;
+
+	@Override
+	public List<List<Integer>> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo) 
+	{
+		List<List<Integer>> ret = new ArrayList<List<Integer>>();
+		int numParts = (int)Math.ceil((double)groupCols.size()/MAX_COL_PER_GROUP);
+		int partSize = (int)Math.ceil((double)groupCols.size()/numParts);
+		
+		for( int i=0, pos=0; i<numParts; i++, pos+=partSize ) {
+			List<Integer> tmp = new ArrayList<Integer>();
+			for( int j=0; j<partSize && pos+j<groupCols.size(); j++ )
+				tmp.add(groupCols.get(pos+j));
+			ret.add(tmp);
+		}
+		
+		return ret;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCoder.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCoder.java
new file mode 100644
index 0000000..778f221
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCoder.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
+
+public class PlanningCoCoder 
+{
+	//internal configurations 
+	private final static PartitionerType COLUMN_PARTITIONER = PartitionerType.BIN_PACKING;
+	
+	private static final Log LOG = LogFactory.getLog(PlanningCoCoder.class.getName());
+	
+	public enum PartitionerType {
+		BIN_PACKING,
+		STATIC,
+	}
+	
+	public static List<int[]> findCocodesByPartitioning(CompressedSizeEstimator sizeEstimator, List<Integer> cols, 
+			CompressedSizeInfo[] colInfos, int numRows, int k) 
+		throws DMLRuntimeException 
+	{
+		// filtering out non-groupable columns as singleton groups
+		// weight is the ratio of its cardinality to the number of rows 
+		int numCols = cols.size();
+		List<Integer> groupCols = new ArrayList<Integer>();
+		HashMap<Integer, GroupableColInfo> groupColsInfo = new HashMap<Integer, GroupableColInfo>();
+		for (int i = 0; i < numCols; i++) {
+			int colIx = cols.get(i);
+			double cardinality = colInfos[colIx].getEstCard();
+			double weight = cardinality / numRows;
+			groupCols.add(colIx);
+			groupColsInfo.put(colIx, new GroupableColInfo(weight,colInfos[colIx].getMinSize()));
+		}
+		
+		// use column group partitioner to create partitions of columns
+		List<List<Integer>> bins = createColumnGroupPartitioner(COLUMN_PARTITIONER)
+				.partitionColumns(groupCols, groupColsInfo);
+
+		// brute force grouping within each partition
+		return (k > 1) ?
+				getCocodingGroupsBruteForce(bins, groupColsInfo, sizeEstimator, numRows, k) :
+				getCocodingGroupsBruteForce(bins, groupColsInfo, sizeEstimator, numRows);
+	}
+
+	private static List<int[]> getCocodingGroupsBruteForce(List<List<Integer>> bins, HashMap<Integer, GroupableColInfo> groupColsInfo, CompressedSizeEstimator estim, int rlen) 
+	{
+		List<int[]> retGroups = new ArrayList<int[]>();		
+		for (List<Integer> bin : bins) {
+			// building an array of singleton CoCodingGroup
+			ArrayList<PlanningCoCodingGroup> sgroups = new ArrayList<PlanningCoCodingGroup>();
+			for (Integer col : bin)
+				sgroups.add(new PlanningCoCodingGroup(col, groupColsInfo.get(col)));
+			// brute force co-coding	
+			PlanningCoCodingGroup[] outputGroups = findCocodesBruteForce(
+					estim, rlen, sgroups.toArray(new PlanningCoCodingGroup[0]));
+			for (PlanningCoCodingGroup grp : outputGroups)
+				retGroups.add(grp.getColIndices());
+		}
+		
+		return retGroups;
+	}
+
+	private static List<int[]> getCocodingGroupsBruteForce(List<List<Integer>> bins, HashMap<Integer, GroupableColInfo> groupColsInfo, CompressedSizeEstimator estim, int rlen, int k) 
+		throws DMLRuntimeException 
+	{
+		List<int[]> retGroups = new ArrayList<int[]>();		
+		try {
+			ExecutorService pool = Executors.newFixedThreadPool( k );
+			ArrayList<CocodeTask> tasks = new ArrayList<CocodeTask>();
+			for (List<Integer> bin : bins) {
+				// building an array of singleton CoCodingGroup
+				ArrayList<PlanningCoCodingGroup> sgroups = new ArrayList<PlanningCoCodingGroup>();
+				for (Integer col : bin)
+					sgroups.add(new PlanningCoCodingGroup(col, groupColsInfo.get(col)));
+				tasks.add(new CocodeTask(estim, sgroups, rlen));
+			}
+			List<Future<PlanningCoCodingGroup[]>> rtask = pool.invokeAll(tasks);	
+			for( Future<PlanningCoCodingGroup[]> lrtask : rtask )
+				for (PlanningCoCodingGroup grp : lrtask.get())
+					retGroups.add(grp.getColIndices());
+			pool.shutdown();
+		}
+		catch(Exception ex) {
+			throw new DMLRuntimeException(ex);
+		}
+		
+		return retGroups;
+	}
+
+	/**
+	 * Identify columns to code together. Uses a greedy approach that merges
+	 * pairs of column groups into larger groups. Each phase of the greedy
+	 * algorithm considers all combinations of pairs to merge.
+	 * 
+	 * @param sizeEstimator compressed size estimator
+	 * @param numRowsWeight number of rows weight
+	 * @param singltonGroups planning co-coding groups
+	 * @return
+	 */
+	private static PlanningCoCodingGroup[] findCocodesBruteForce(
+			CompressedSizeEstimator estim, int numRows,
+			PlanningCoCodingGroup[] singletonGroups) 
+	{
+		if( LOG.isTraceEnabled() )
+			LOG.trace("Cocoding: process "+singletonGroups.length);
+		
+		List<PlanningCoCodingGroup> workset = 
+				new ArrayList<PlanningCoCodingGroup>(Arrays.asList(singletonGroups));
+		
+		//establish memo table for extracted column groups
+		PlanningMemoTable memo = new PlanningMemoTable();
+		
+		//process merging iterations until no more change
+		boolean changed = true;
+		while( changed && workset.size()>1 ) {
+			//find best merge, incl memoization
+			PlanningCoCodingGroup tmp = null;
+			for( int i=0; i<workset.size(); i++ ) {
+				for( int j=i+1; j<workset.size(); j++ ) {
+					PlanningCoCodingGroup c1 = workset.get(i);
+					PlanningCoCodingGroup c2 = workset.get(j);
+					memo.incrStats(1, 0, 0);
+					
+					//pruning filter: skip dominated candidates
+					if( -Math.min(c1.getEstSize(), c2.getEstSize()) > memo.getOptChangeInSize() )
+						continue;
+					
+					//memoization or newly created group (incl bitmap extraction)
+					PlanningCoCodingGroup c1c2 = memo.getOrCreate(c1, c2, estim, numRows);
+		
+					//keep best merged group only
+					if( tmp == null || c1c2.getChangeInSize() < tmp.getChangeInSize()
+						|| (c1c2.getChangeInSize() == tmp.getChangeInSize() 
+							&& c1c2.getColIndices().length < tmp.getColIndices().length))
+						tmp = c1c2;
+				}
+			}
+			
+			//modify working set
+			if( tmp != null && tmp.getChangeInSize() < 0 ) {
+				workset.remove(tmp.getLeftGroup());
+				workset.remove(tmp.getRightGroup());
+				workset.add(tmp);
+				memo.remove(tmp);
+				
+				if( LOG.isTraceEnabled() ) {
+					LOG.trace("--merge groups: "+Arrays.toString(tmp.getLeftGroup().getColIndices())+" and "
+							+Arrays.toString(tmp.getRightGroup().getColIndices()));
+				}
+			}
+			else {
+				changed = false;
+			}
+		}
+		
+		if( LOG.isTraceEnabled() )
+			LOG.trace("--stats: "+Arrays.toString(memo.getStats()));
+		
+		return workset.toArray(new PlanningCoCodingGroup[0]);
+	}
+
+	private static ColumnGroupPartitioner createColumnGroupPartitioner(PartitionerType type) {
+		switch( type ) {
+			case BIN_PACKING: 
+				return new ColumnGroupPartitionerBinPacking();
+				
+			case STATIC:
+				return new ColumnGroupPartitionerStatic();
+				
+			default:
+				throw new RuntimeException(
+					"Unsupported column group partitioner: "+type.toString());
+		}
+	}
+	
+	public static class GroupableColInfo {
+		public final double cardRatio;
+		public final long size;
+
+		public GroupableColInfo(double lcardRatio, long lsize) {
+			cardRatio = lcardRatio;
+			size = lsize;
+		}
+	}
+
+	private static class CocodeTask implements Callable<PlanningCoCodingGroup[]> 
+	{
+		private CompressedSizeEstimator _estim = null;
+		private ArrayList<PlanningCoCodingGroup> _sgroups = null;
+		private int _rlen = -1;
+		
+		protected CocodeTask( CompressedSizeEstimator estim, ArrayList<PlanningCoCodingGroup> sgroups, int rlen )  {
+			_estim = estim;
+			_sgroups = sgroups;
+			_rlen = rlen;
+		}
+		
+		@Override
+		public PlanningCoCodingGroup[] call() throws DMLRuntimeException {
+			// brute force co-coding	
+			return findCocodesBruteForce(_estim, _rlen, 
+					_sgroups.toArray(new PlanningCoCodingGroup[0]));
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCodingGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCodingGroup.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCodingGroup.java
new file mode 100644
index 0000000..caaa271
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningCoCodingGroup.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
+
+/** 
+ * Class to represent information about co-coding a group of columns. 
+ * 
+ */
+public class PlanningCoCodingGroup 
+{
+	private int[] _colIndexes;
+	private PlanningCoCodingGroup _leftGrp;
+	private PlanningCoCodingGroup _rightGrp;
+	
+	private long _estSize;
+	private double _cardRatio;
+	
+	
+	/**
+	 * Constructor for a one-column group; i.e. do not co-code a given column.
+	 * 
+	 * @param col column
+	 * @param info groupable column info
+	 */
+	public PlanningCoCodingGroup(int col, GroupableColInfo info) {
+		_colIndexes = new int[]{col};
+		_estSize = info.size;
+		_cardRatio = info.cardRatio;
+	}
+
+	/**
+	 * Constructor for merging two disjoint groups of columns
+	 * 
+	 * @param grp1   first group of columns to merge
+	 * @param grp2   second group to merge
+	 * @param bitmapSizeEstimator bitmap size estimator
+	 * @param numRowsWeight numRows x sparsity
+	 */
+	public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2,
+			CompressedSizeEstimator estim, int numRows) 
+	{
+		_colIndexes = getMergedIndexes(grp1._colIndexes, grp2._colIndexes);
+		
+		// estimating size info
+		CompressedSizeInfo groupSizeInfo = estim
+				.estimateCompressedColGroupSize(_colIndexes);
+		_estSize = groupSizeInfo.getMinSize();
+		_cardRatio = groupSizeInfo.getEstCard() / numRows;
+		
+		_leftGrp = grp1;
+		_rightGrp = grp2;
+	}
+
+	public int[] getColIndices() {
+		return _colIndexes;
+	}
+
+	/**
+	 * Obtain estimated compressed size of the grouped columns.
+	 * 
+	 * @return estimated compressed size of the grouped columns
+	 */
+	public long getEstSize() {
+		return _estSize;
+	}
+	
+	public double getChangeInSize() {
+		if( _leftGrp == null || _rightGrp == null )
+			return 0;
+		
+		return getEstSize() 
+			- _leftGrp.getEstSize() 
+			- _rightGrp.getEstSize();
+	}
+
+	public double getCardinalityRatio() {
+		return _cardRatio;
+	}
+	
+	public PlanningCoCodingGroup getLeftGroup() {
+		return _leftGrp;
+	}
+	
+	public PlanningCoCodingGroup getRightGroup() {
+		return _rightGrp;
+	}
+	
+	@Override 
+	public int hashCode() {
+		return Arrays.hashCode(_colIndexes);
+	}
+	
+	@Override 
+	public boolean equals(Object that) {
+		if( !(that instanceof PlanningCoCodingGroup) )
+			return false;
+		
+		PlanningCoCodingGroup thatgrp = (PlanningCoCodingGroup) that;
+		return Arrays.equals(_colIndexes, thatgrp._colIndexes);
+	}
+
+	@Override
+	public String toString() {
+		return Arrays.toString(_colIndexes);
+	}
+	
+	public static int[] getMergedIndexes(int[] indexes1, int[] indexes2) {
+		// merge sorted non-empty arrays
+		int[] ret = new int[indexes1.length + indexes2.length];		
+		int grp1Ptr = 0, grp2Ptr = 0;
+		for (int mergedIx = 0; mergedIx < ret.length; mergedIx++) {
+			if (indexes1[grp1Ptr] < indexes2[grp2Ptr]) {
+				ret[mergedIx] = indexes1[grp1Ptr++];
+				if (grp1Ptr == indexes1.length) {
+					System.arraycopy(indexes2, grp2Ptr, ret, mergedIx + 1, indexes2.length - grp2Ptr);
+					break;
+				}
+			} 
+			else {
+				ret[mergedIx] = indexes2[grp2Ptr++];
+				if (grp2Ptr == indexes2.length) {
+					System.arraycopy(indexes1, grp1Ptr, ret, mergedIx + 1, indexes1.length - grp1Ptr);
+					break;
+				}
+			}
+		}
+		
+		return ret;
+	}
+	
+	public static class ColIndexes {
+		final int[] _colIndexes;
+		
+		public ColIndexes(int[] colIndexes) {
+			_colIndexes = colIndexes;
+		}
+	
+		@Override 
+		public int hashCode() {
+			return Arrays.hashCode(_colIndexes);
+		}
+		
+		@Override 
+		public boolean equals(Object that) {
+			if( !(that instanceof ColIndexes) )
+				return false;
+			
+			ColIndexes thatgrp = (ColIndexes) that;
+			return Arrays.equals(_colIndexes, thatgrp._colIndexes);
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningMemoTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningMemoTable.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningMemoTable.java
new file mode 100644
index 0000000..3f683c2
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/PlanningMemoTable.java
@@ -0,0 +1,75 @@
+package org.apache.sysml.runtime.compress.cocode;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.sysml.runtime.compress.cocode.PlanningCoCodingGroup.ColIndexes;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+
+public class PlanningMemoTable 
+{
+	private HashMap<ColIndexes,PlanningCoCodingGroup> _memo = new HashMap<ColIndexes,PlanningCoCodingGroup>();
+	private double _optChangeInSize = 0; 
+	private int[] _stats = new int[3];
+	
+	public PlanningCoCodingGroup getOrCreate(PlanningCoCodingGroup c1, PlanningCoCodingGroup c2, CompressedSizeEstimator estim, int numRows) 
+	{
+		ColIndexes c1c2Indexes = new ColIndexes(PlanningCoCodingGroup
+				.getMergedIndexes(c1.getColIndices(), c2.getColIndices()));	
+		
+		//probe memo table for existing column group (avoid extraction)
+		PlanningCoCodingGroup c1c2 = _memo.get(c1c2Indexes);
+		
+		//create non-existing group and maintain global stats
+		incrStats(0, 1, 0); //probed plans
+		if( c1c2 == null ) { 
+			c1c2 = new PlanningCoCodingGroup(c1, c2, estim, numRows);
+			_memo.put(c1c2Indexes, c1c2);
+			_optChangeInSize = Math.min(_optChangeInSize, c1c2.getChangeInSize());
+			incrStats(0, 0, 1); //created plans
+		}
+		
+		return c1c2;
+	}
+	
+	public void remove(PlanningCoCodingGroup grp) {
+		//remove atomic groups
+		_memo.remove(new ColIndexes(grp.getColIndices()));
+		_memo.remove(new ColIndexes(grp.getLeftGroup().getColIndices()));
+		_memo.remove(new ColIndexes(grp.getRightGroup().getColIndices()));
+		
+		_optChangeInSize = 0;
+		
+		//remove overlapping groups and recompute min size
+		Iterator<Entry<ColIndexes,PlanningCoCodingGroup>> iter 
+			= _memo.entrySet().iterator();
+		while( iter.hasNext() ) {
+			PlanningCoCodingGroup tmp = iter.next().getValue();
+			if( Arrays.equals(tmp.getLeftGroup().getColIndices(), grp.getLeftGroup().getColIndices())
+				|| Arrays.equals(tmp.getLeftGroup().getColIndices(), grp.getRightGroup().getColIndices())
+				|| Arrays.equals(tmp.getRightGroup().getColIndices(), grp.getLeftGroup().getColIndices())
+				|| Arrays.equals(tmp.getRightGroup().getColIndices(), grp.getRightGroup().getColIndices()))
+			{
+				iter.remove();
+			}
+			else
+				_optChangeInSize = Math.min(_optChangeInSize, tmp.getChangeInSize());
+		}
+	}
+	
+	public void incrStats(int v1, int v2, int v3) {
+		_stats[0] += v1;
+		_stats[1] += v2;
+		_stats[2] += v3;
+	}
+	
+	public double getOptChangeInSize() {
+		return _optChangeInSize;
+	}
+
+	public int[] getStats() {
+		return _stats;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
index 2b49403..4c470e2 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
@@ -20,6 +20,7 @@
 package org.apache.sysml.runtime.compress.estim;
 
 import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.compress.UncompressedBitmap;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
@@ -29,9 +30,16 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 public abstract class CompressedSizeEstimator 
 {
 	protected MatrixBlock _data;
+	protected final int _numRows;
 
 	public CompressedSizeEstimator(MatrixBlock data) {
 		_data = data;
+		_numRows = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+				_data.getNumColumns() : _data.getNumRows();
+	}
+	
+	public int getNumRows() {
+		return _numRows;
 	}
 
 	public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes);
@@ -47,15 +55,19 @@ public abstract class CompressedSizeEstimator
 		
 		//compute size estimation factors
 		for (int i = 0; i < numVals; i++) {
-			int[] list = ubm.getOffsetsList(i);
-			numOffs += list.length;
-			numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1;
-			numSingle += (list.length==1) ? 1 : 0;
+			int[] list = ubm.getOffsetsList(i).extractValues();
+			int listSize = ubm.getNumOffsets(i);
+			numOffs += listSize;
+			numSegs += list[listSize - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1;
+			numSingle += (listSize==1) ? 1 : 0;
 			if( inclRLE ) {
 				int lastOff = -2;
-				for (int j = 0; j < list.length; j++) {
-					if (list[j] != lastOff + 1)
-						numRuns++;
+				for (int j = 0; j < listSize; j++) {
+					if( list[j] != lastOff + 1 ) {
+						numRuns++; //new run
+						numRuns += (list[j]-lastOff) / //empty runs
+								BitmapEncoder.BITMAP_BLOCK_SZ;
+					}
 					lastOff = list[j];
 				}
 			}
@@ -107,6 +119,27 @@ public abstract class CompressedSizeEstimator
 		ret += 2 * numSeqs;
 		return ret;
 	}
+	
+	/**
+	 * Estimates the number of bytes needed to encode this column group 
+	 * in DDC1 or DDC2 format.
+	 * 
+	 * @param numVals number of value tuples
+	 * @param numRows number of rows
+	 * @param numCols number of columns
+	 * @return number of bytes to encode column group in RLE format
+	 */
+	protected static long getDDCSize(int numVals, int numRows, int numCols) {
+		if( numVals > Character.MAX_VALUE-1 )
+			return Long.MAX_VALUE;
+		
+		int ret = 0;
+		//distinct value tuples [double per col]
+		ret += 8 * numVals * numCols;
+		//data [byte or char per row]
+		ret += ((numVals>255) ? 2 : 1) * numRows;
+		return ret;
+	}
 
 	protected static class SizeEstimationFactors {
  		protected int numVals;   //num value tuples

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
index d24255d..3677c23 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
@@ -46,8 +46,9 @@ public class CompressedSizeEstimatorExact extends CompressedSizeEstimator
 		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
 		
 		//construct new size info summary
-		return new CompressedSizeInfo(fact.numVals,
+		return new CompressedSizeInfo(fact.numVals, fact.numOffs,
 				getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
-				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
+				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()),
+				getDDCSize(fact.numVals, _numRows, ubm.getNumColumns()));
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
index eb0040f..a59893d 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
@@ -21,103 +21,106 @@ package org.apache.sysml.runtime.compress.estim;
 
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.HashSet;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.math3.analysis.UnivariateFunction;
+import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils;
 import org.apache.commons.math3.distribution.ChiSquaredDistribution;
 import org.apache.commons.math3.random.RandomDataGenerator;
-import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.BitmapEncoder;
 import org.apache.sysml.runtime.compress.ReaderColumnSelection;
 import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
-import org.apache.sysml.runtime.compress.ReaderColumnSelectionDense;
-import org.apache.sysml.runtime.compress.ReaderColumnSelectionDenseSample;
-import org.apache.sysml.runtime.compress.ReaderColumnSelectionSparse;
 import org.apache.sysml.runtime.compress.UncompressedBitmap;
 import org.apache.sysml.runtime.compress.utils.DblArray;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
 public class CompressedSizeEstimatorSample extends CompressedSizeEstimator 
 {
-	private static final boolean CORRECT_NONZERO_ESTIMATE = false; //TODO enable for production
 	private final static double SHLOSSER_JACKKNIFE_ALPHA = 0.975;
-	public static final float HAAS_AND_STOKES_ALPHA1 = 0.9F; //0.9 recommended in paper
-	public static final float HAAS_AND_STOKES_ALPHA2 = 30F; //30 recommended in paper
-	public static final float HAAS_AND_STOKES_UJ2A_C = 50; //50 recommend in paper
-
-	private int[] _sampleRows = null;
-	private RandomDataGenerator _rng = null;
-	private int _numRows = -1;
-
-	public CompressedSizeEstimatorSample(MatrixBlock data, int[] sampleRows) {
+	public static final double HAAS_AND_STOKES_ALPHA1 = 0.9; //0.9 recommended in paper
+	public static final double HAAS_AND_STOKES_ALPHA2 = 30; //30 recommended in paper
+	public static final int HAAS_AND_STOKES_UJ2A_C = 50; //50 recommend in paper
+	public static final boolean HAAS_AND_STOKES_UJ2A_CUT2 = true; //cut frequency in half
+	public static final boolean HAAS_AND_STOKES_UJ2A_SOLVE = true; //true recommended
+	public static final int MAX_SOLVE_CACHE_SIZE = 64*1024; //global 2MB cache
+	//note: we use a relatively high ALPHA2 and the cut-in-half approach because it
+	//leads to moderate overestimation (compared to systematic underestimation) in
+	//order to follow a conservative approach
+	
+	private static final Log LOG = LogFactory.getLog(CompressedSizeEstimatorSample.class.getName());
+
+	private static ThreadLocal<RandomDataGenerator> _rng = new ThreadLocal<RandomDataGenerator>() {
+        protected RandomDataGenerator initialValue() { return new RandomDataGenerator(); }
+    };
+    
+    private int[] _sampleRows = null;
+    private HashMap<Integer, Double> _solveCache = null;
+	
+    
+	public CompressedSizeEstimatorSample(MatrixBlock data, int sampleSize) 
+		throws DMLRuntimeException 
+	{
 		super(data);
-		_sampleRows = sampleRows;
-		_rng = new RandomDataGenerator();
-		_numRows = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
-				_data.getNumColumns() : _data.getNumRows();
-	}
-
-	public CompressedSizeEstimatorSample(MatrixBlock mb, int sampleSize) {
-		this(mb, null);
+		
+		//get sample of rows, incl eager extraction 
 		_sampleRows = getSortedUniformSample(_numRows, sampleSize);
-	}
-
-	/**
-	 * set the sample rows (assumed to be sorted)
-	 * 
-	 * @param sampleRows sample rows, assumed to be sorted
-	 */
-	public void setSampleRows(int[] sampleRows) {
-		_sampleRows = sampleRows;
+		if( SizeEstimatorFactory.EXTRACT_SAMPLE_ONCE ) {
+			MatrixBlock select = new MatrixBlock(_numRows, 1, false);
+			for( int i=0; i<sampleSize; i++ )
+				select.quickSetValue(_sampleRows[i], 0, 1);
+			_data = _data.removeEmptyOperations(new MatrixBlock(), 
+					!CompressedMatrixBlock.TRANSPOSE_INPUT, select);
+		}
+		
+		//establish estimator-local cache for numeric solve
+		_solveCache = new HashMap<Integer, Double>();
 	}
 
 	@Override
 	public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) 
 	{
+		int sampleSize = _sampleRows.length;
+		int numCols = colIndexes.length;
+		int[] sampleRows = _sampleRows;
+		
 		//extract statistics from sample
-		UncompressedBitmap ubm = BitmapEncoder.extractBitmapFromSample(
-				colIndexes, _data, _sampleRows);
+		UncompressedBitmap ubm = SizeEstimatorFactory.EXTRACT_SAMPLE_ONCE ?
+				BitmapEncoder.extractBitmap(colIndexes, _data) :
+				BitmapEncoder.extractBitmapFromSample(colIndexes, _data, sampleRows);
 		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, false);
-
-		//estimate number of distinct values 
-		int totalCardinality = getNumDistinctValues(colIndexes);
-		totalCardinality = Math.max(totalCardinality, fact.numVals); //fix anomalies w/ large sample fraction
-		totalCardinality = Math.min(totalCardinality, _numRows); //fix anomalies w/ large sample fraction
 		
-		//estimate unseen values
-		// each unseen is assumed to occur only once (it did not show up in the sample because it is rare)
-		int unseen = Math.max(0, totalCardinality - fact.numVals);
-		int sampleSize = _sampleRows.length;
-		
-		//estimate number of offsets
-		double sparsity = OptimizerUtils.getSparsity(
-				_data.getNumRows(), _data.getNumColumns(), _data.getNonZeros());
+		//estimate number of distinct values (incl fixes for anomalies w/ large sample fraction)
+		int totalCardinality = getNumDistinctValues(ubm, _numRows, sampleRows, _solveCache);
+		totalCardinality = Math.max(totalCardinality, fact.numVals);
+		totalCardinality = Math.min(totalCardinality, _numRows); 
 		
-		// expected value given that we don't store the zero values
-		float totalNumOffs = (float) (_numRows * (1 - Math.pow(1 - sparsity,colIndexes.length)));		
-		if( CORRECT_NONZERO_ESTIMATE ) {
-			long numZeros = sampleSize - fact.numOffs;
-			float C = Math.max(1-(float)fact.numSingle/sampleSize, (float)sampleSize/_numRows); 
-			totalNumOffs = _numRows - ((numZeros>0)? (float)_numRows/sampleSize*C*numZeros : 0);
-		}
+		//estimate unseen values
+		int unseenVals = totalCardinality - fact.numVals;
 		
-		// For a single offset, the number of blocks depends on the value of
-		// that offset. small offsets (first group of rows in the matrix)
-		// require a small number of blocks and large offsets (last group of
-		// rows) require a large number of blocks. The unseen offsets are
-		// distributed over the entire offset range. A reasonable and fast
-		// estimate for the number of blocks is to use the arithmetic mean of
-		// the number of blocks used for the first index (=1) and that of the
-		// last index.
-		int numUnseenSeg = Math.round(unseen
-				* (2.0f * BitmapEncoder.BITMAP_BLOCK_SZ + _numRows) / 2
-				/ BitmapEncoder.BITMAP_BLOCK_SZ);
+		//estimate number of non-zeros (conservatively round up)
+		double C = Math.max(1 - (double)fact.numSingle/sampleSize, (double)sampleSize/_numRows); 
+		int numZeros = sampleSize - fact.numOffs; //>=0
+		int numNonZeros = (int)Math.ceil(_numRows - (double)_numRows/sampleSize * C * numZeros);
+		numNonZeros = Math.max(numNonZeros, totalCardinality); //handle anomaly of zi=0
+
+		if( totalCardinality<=0 || unseenVals<0 || numZeros<0 || numNonZeros<=0 )
+			LOG.warn("Invalid estimates detected for "+Arrays.toString(colIndexes)+": "
+					+totalCardinality+" "+unseenVals+" "+numZeros+" "+numNonZeros);
+			
+		// estimate number of segments and number of runs incl correction for
+		// empty segments and empty runs (via expected mean of offset value)
+		int numUnseenSeg = (int) (unseenVals * 
+			Math.ceil((double)_numRows/BitmapEncoder.BITMAP_BLOCK_SZ/2));
 		int totalNumSeg = fact.numSegs + numUnseenSeg;
-		int totalNumRuns = getNumRuns(ubm, sampleSize, _numRows) + unseen;
+		int totalNumRuns = getNumRuns(ubm, sampleSize, _numRows, sampleRows) + numUnseenSeg;
 
 		//construct new size info summary
-		return new CompressedSizeInfo(totalCardinality,
-				getRLESize(totalCardinality, totalNumRuns, colIndexes.length),
-				getOLESize(totalCardinality, totalNumOffs, totalNumSeg, colIndexes.length));
+		return new CompressedSizeInfo(totalCardinality, numNonZeros,
+				getRLESize(totalCardinality, totalNumRuns, numCols),
+				getOLESize(totalCardinality, numNonZeros, totalNumSeg, numCols),
+				getDDCSize(totalCardinality, _numRows, numCols));
 	}
 
 	@Override
@@ -127,47 +130,50 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
 		
 		//construct new size info summary
-		return new CompressedSizeInfo(fact.numVals,
+		return new CompressedSizeInfo(fact.numVals, fact.numOffs,
 				getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
-				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
+				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()),
+				getDDCSize(fact.numVals, _numRows, ubm.getNumColumns()));
 	}
 
-	private int getNumDistinctValues(int[] colIndexes) {
-		return haasAndStokes(colIndexes);
+	private static int getNumDistinctValues(UncompressedBitmap ubm, int numRows, int[] sampleRows, 
+			HashMap<Integer, Double> solveCache) {
+		return haasAndStokes(ubm, numRows, sampleRows.length, solveCache);
 	}
 
-	private int getNumRuns(UncompressedBitmap sampleUncompressedBitmap,
-			int sampleSize, int totalNumRows) {
-		int numVals = sampleUncompressedBitmap.getNumValues();
+	private static int getNumRuns(UncompressedBitmap ubm,
+			int sampleSize, int totalNumRows, int[] sampleRows) {
+		int numVals = ubm.getNumValues();
 		// all values in the sample are zeros
 		if (numVals == 0)
 			return 0;
-		float numRuns = 0;
+		double numRuns = 0;
 		for (int vi = 0; vi < numVals; vi++) {
-			int[] offsets = sampleUncompressedBitmap.getOffsetsList(vi);
-			float offsetsRatio = ((float) offsets.length) / sampleSize;
-			float avgAdditionalOffsets = offsetsRatio * totalNumRows
+			int[] offsets = ubm.getOffsetsList(vi).extractValues();
+			int offsetsSize = ubm.getNumOffsets(vi);
+			double offsetsRatio = ((double) offsetsSize) / sampleSize;
+			double avgAdditionalOffsets = offsetsRatio * totalNumRows
 					/ sampleSize;
 			if (avgAdditionalOffsets < 1) {
 				// Ising-Stevens does not hold
 				// fall-back to using the expected number of offsets as an upper
 				// bound on the number of runs
-				numRuns += ((float) offsets.length) * totalNumRows / sampleSize;
+				numRuns += ((double) offsetsSize) * totalNumRows / sampleSize;
 				continue;
 			}
 			int intervalEnd, intervalSize;
-			float additionalOffsets;
+			double additionalOffsets;
 			// probability of an index being non-offset in current and previous
 			// interval respectively
-			float nonOffsetProb, prevNonOffsetProb = 1;
+			double nonOffsetProb, prevNonOffsetProb = 1;
 			boolean reachedSampleEnd = false;
 			// handling the first interval separately for simplicity
 			int intervalStart = -1;
-			if (_sampleRows[0] == 0) {
+			if (sampleRows[0] == 0) {
 				// empty interval
 				intervalStart = 0;
 			} else {
-				intervalEnd = _sampleRows[0];
+				intervalEnd = sampleRows[0];
 				intervalSize = intervalEnd - intervalStart - 1;
 				// expected value of a multivariate hypergeometric distribution
 				additionalOffsets = offsetsRatio * intervalSize;
@@ -188,7 +194,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 				// intervalStart will always be pointing at the current value
 				// in the separator block
 
-				if (offsetsPtrs < offsets.length
+				if (offsetsPtrs < offsetsSize
 						&& offsets[offsetsPtrs] == intervalStart) {
 					startedWithOffset = true;
 					offsetsPtrs++;
@@ -197,10 +203,10 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 					seenNonOffset = true;
 					endedWithOffset = false;
 				}
-				while (intervalStart + 1 == _sampleRows[ix]) {
-					intervalStart = _sampleRows[ix];
+				while (intervalStart + 1 == sampleRows[ix]) {
+					intervalStart = sampleRows[ix];
 					if (seenNonOffset) {
-						if (offsetsPtrs < offsets.length
+						if (offsetsPtrs < offsetsSize
 								&& offsets[offsetsPtrs] == intervalStart) {
 							withinSepRun = 1;
 							offsetsPtrs++;
@@ -210,7 +216,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 							withinSepRun = 0;
 							endedWithOffset = false;
 						}
-					} else if (offsetsPtrs < offsets.length
+					} else if (offsetsPtrs < offsetsSize
 							&& offsets[offsetsPtrs] == intervalStart) {
 						offsetsPtrs++;
 						endedWithOffset = true;
@@ -230,7 +236,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 				// runs within an interval of unknowns
 				if (reachedSampleEnd)
 					break;
-				intervalEnd = _sampleRows[ix];
+				intervalEnd = sampleRows[ix];
 				intervalSize = intervalEnd - intervalStart - 1;
 				// expected value of a multivariate hypergeometric distribution
 				additionalOffsets = offsetsRatio * intervalSize;
@@ -280,7 +286,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 			}
 			// additional runs resulting from x's on the boundaries of the
 			// separators
-			endedWithOffset = intervalStart == offsets[offsets.length - 1];
+			endedWithOffset = intervalStart == offsets[offsetsSize - 1];
 			if (seenNonOffset) {
 				if (startedWithOffset) {
 					numRuns += prevNonOffsetProb;
@@ -296,31 +302,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 					numRuns += prevNonOffsetProb * nonOffsetProb;
 			}
 		}
-		return Math.round(numRuns);
-	}
-
-	private int haasAndStokes(int[] colIndexes) {
-		ReaderColumnSelection reader =  new ReaderColumnSelectionDenseSample(_data, 
-				colIndexes, _sampleRows, !CompressedMatrixBlock.MATERIALIZE_ZEROS);
-		return haasAndStokes(_numRows, _sampleRows.length, reader);
-	}
-
-	/**
-	 * TODO remove, just for local debugging.
-	 * 
-	 * @param colIndexes column indexes
-	 * @return exact number of district values
-	 */
-	@SuppressWarnings("unused")
-	private int getExactNumDistinctValues(int[] colIndexes) {
-		HashSet<DblArray> distinctVals = new HashSet<DblArray>();
-		ReaderColumnSelection reader = (_data.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT) ? 
-				new ReaderColumnSelectionSparse(_data, colIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS) : 
-				new ReaderColumnSelectionDense(_data, colIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS);
-		DblArray val = null;
-		while (null != (val = reader.nextRow()))
-			distinctVals.add(val);
-		return distinctVals.size();
+		return (int)Math.min(Math.round(numRuns), Integer.MAX_VALUE);
 	}
 
 	/**
@@ -330,10 +312,11 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 	 * @param smplSize sample size
 	 * @return sorted array of integers
 	 */
-	private int[] getSortedUniformSample(int range, int smplSize) {
+	private static int[] getSortedUniformSample(int range, int smplSize) {
 		if (smplSize == 0)
 			return new int[] {};
-		int[] sample = _rng.nextPermutation(range, smplSize);
+		RandomDataGenerator rng = _rng.get();
+		int[] sample = rng.nextPermutation(range, smplSize);
 		Arrays.sort(sample);
 		return sample;
 	}
@@ -380,22 +363,13 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 	 * @param sampleRowsReader reader
 	 * @return estimator
 	 */
-	@SuppressWarnings("unused")
-	private static int shlosserEstimator(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader) 
-	{
-		return shlosserEstimator(nRows, sampleSize, sampleRowsReader,
-				getValCounts(sampleRowsReader));
-	}
-
-	private static int shlosserEstimator(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader,
-			HashMap<DblArray, Integer> valsCount) 
+	private static int shlosserEstimator(UncompressedBitmap ubm, int nRows, int sampleSize) 
 	{
 		double q = ((double) sampleSize) / nRows;
 		double oneMinusQ = 1 - q;
 
-		int[] freqCounts = getFreqCounts(valsCount);
+		int numVals = ubm.getNumValues();
+		int[] freqCounts = getFreqCounts(ubm);
 
 		double numerSum = 0, denomSum = 0;
 		int iPlusOne = 1;
@@ -403,7 +377,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 			numerSum += Math.pow(oneMinusQ, iPlusOne) * freqCounts[i];
 			denomSum += iPlusOne * q * Math.pow(oneMinusQ, i) * freqCounts[i];
 		}
-		int estimate = (int) Math.round(valsCount.size() + freqCounts[0]
+		int estimate = (int) Math.round(numVals + freqCounts[0]
 				* numerSum / denomSum);
 		return estimate < 1 ? 1 : estimate;
 	}
@@ -418,25 +392,16 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 	 * @param sampleRowsReader row reader
 	 * @return estimator
 	 */
-	@SuppressWarnings("unused")
-	private static int smoothedJackknifeEstimator(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader) 
-	{
-		return smoothedJackknifeEstimator(nRows, sampleSize, sampleRowsReader,
-				getValCounts(sampleRowsReader));
-	}
-
-	private static int smoothedJackknifeEstimator(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader,
-			HashMap<DblArray, Integer> valsCount) 
+	private static int smoothedJackknifeEstimator(UncompressedBitmap ubm, int nRows, int sampleSize) 
 	{
-		int[] freqCounts = getFreqCounts(valsCount);
+		int numVals = ubm.getNumValues();
+		int[] freqCounts = getFreqCounts(ubm);
 		// all values in the sample are zeros
 		if (freqCounts.length == 0)
 			return 0;
 		// nRows is N and sampleSize is n
 
-		int d = valsCount.size();
+		int d = numVals;
 		double f1 = freqCounts[0];
 		int Nn = nRows * sampleSize;
 		double D0 = (d - f1 / sampleSize)
@@ -515,43 +480,31 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 	 * @return estimator
 	 */
 	@SuppressWarnings("unused")
-	private static int shlosserJackknifeEstimator(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader) {
-		HashMap<DblArray, Integer> valsCount = getValCounts(sampleRowsReader);
-
+	private static int shlosserJackknifeEstimator(UncompressedBitmap ubm, int nRows, int sampleSize) 
+	{
+		int numVals = ubm.getNumValues();
+		CriticalValue cv = computeCriticalValue(sampleSize);
+		
 		// uniformity chi-square test
-		double nBar = ((double) sampleSize) / valsCount.size();
+		double nBar = ((double) sampleSize) / numVals;
 		// test-statistic
 		double u = 0;
-		for (int cnt : valsCount.values()) {
-			u += Math.pow(cnt - nBar, 2);
+		for( int i=0; i<numVals; i++ ) {
+			u += Math.pow(ubm.getNumOffsets(i) - nBar, 2);
 		}
 		u /= nBar;
-		if (sampleSize != usedSampleSize)
+		if (sampleSize != cv.usedSampleSize)
 			computeCriticalValue(sampleSize);
-		if (u < uniformityCriticalValue) {
-			// uniform
-			return smoothedJackknifeEstimator(nRows, sampleSize,
-					sampleRowsReader, valsCount);
-		} else {
-			return shlosserEstimator(nRows, sampleSize, sampleRowsReader,
-					valsCount);
-		}
+		if (u < cv.uniformityCriticalValue) // uniform
+			return smoothedJackknifeEstimator(ubm, nRows, sampleSize);
+		else 
+			return shlosserEstimator(ubm, nRows, sampleSize);
 	}
 
-	/*
-	 * In the shlosserSmoothedJackknifeEstimator as long as the sample size did
-	 * not change, we will have the same critical value each time the estimator
-	 * is used (given that alpha is the same). We cache the critical value to
-	 * avoid recomputing it in each call.
-	 */
-	private static double uniformityCriticalValue;
-	private static int usedSampleSize;
-	
-	private static void computeCriticalValue(int sampleSize) {
+	private static CriticalValue computeCriticalValue(int sampleSize) {
 		ChiSquaredDistribution chiSqr = new ChiSquaredDistribution(sampleSize - 1);
-		uniformityCriticalValue = chiSqr.inverseCumulativeProbability(SHLOSSER_JACKKNIFE_ALPHA);
-		usedSampleSize = sampleSize;
+		return new CriticalValue(
+			chiSqr.inverseCumulativeProbability(SHLOSSER_JACKKNIFE_ALPHA), sampleSize);
 	}
 
 	/**
@@ -563,115 +516,43 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 	 * 
 	 * @param nRows number of rows
 	 * @param sampleSize sample size
+	 * @param solveCache 
 	 * @param sampleRowsReader row reader
 	 * @return estimator
 	 */
-	private static int haasAndStokes(int nRows, int sampleSize,
-			ReaderColumnSelection sampleRowsReader) 
+	private static int haasAndStokes(UncompressedBitmap ubm, int nRows, int sampleSize, HashMap<Integer, Double> solveCache)
 	{
-		HashMap<DblArray, Integer> valsCount = getValCounts(sampleRowsReader);
+		//obtain value and frequency histograms
+		int numVals = ubm.getNumValues();
+		int[] freqCounts = getFreqCounts(ubm);
+	
 		// all values in the sample are zeros.
-		if (valsCount.size() == 0)
+		if( numVals == 0 )
 			return 1;
-		int[] freqCounts = getFreqCounts(valsCount);
-		float q = ((float) sampleSize) / nRows;
-		float _1MinusQ = 1 - q;
-		// Eq. 11
-		float duj1Fraction = ((float) sampleSize)
-				/ (sampleSize - _1MinusQ * freqCounts[0]);
-		float duj1 = duj1Fraction * valsCount.size();
-		// Eq. 16
-		float gamma = 0;
-		for (int i = 1; i <= freqCounts.length; i++) {
-			gamma += i * (i - 1) * freqCounts[i - 1];
-		}
-		gamma *= duj1 / sampleSize / sampleSize;
-		gamma += duj1 / nRows - 1;
-		gamma = Math.max(gamma, 0);
-		int estimate;
 		
-		if (gamma < HAAS_AND_STOKES_ALPHA1) {
-			// UJ2 - begining of page 1479
-		//	System.out.println("uj2");
-			estimate = (int) (duj1Fraction * (valsCount.size() - freqCounts[0]
-					* _1MinusQ * Math.log(_1MinusQ) * gamma / q));
-		} else if (gamma < HAAS_AND_STOKES_ALPHA2) {
-			// UJ2a - end of page 1998
-			//System.out.println("uj2a");
-			int numRemovedClasses = 0;
-			float updatedNumRows = nRows;
-			int updatedSampleSize = sampleSize;
-
-			for (Integer cnt : valsCount.values()) {
-				if (cnt > HAAS_AND_STOKES_UJ2A_C) {
-					numRemovedClasses++;
-					freqCounts[cnt - 1]--;
-					updatedSampleSize -= cnt;
-					/*
-					 * To avoid solving Eq. 20 numerically for the class size in
-					 * the full population (N_j), the current implementation
-					 * just scales cnt (n_j) by the sampling ratio (q).
-					 * Intuitively, the scaling should be fine since cnt is
-					 * large enough. Also, N_j in Eq. 20 is lower-bounded by cnt
-					 * which is already large enough to make the denominator in
-					 * Eq. 20 very close to 1.
-					 */
-					updatedNumRows -= ((float) cnt) / q;
-				}
-			}
-			if (updatedSampleSize == 0) {
-				// use uJ2a
-				
-				estimate = (int) (duj1Fraction * (valsCount.size() - freqCounts[0]
-						* (_1MinusQ) * Math.log(_1MinusQ) * gamma / q));
-			} else {
-				float updatedQ = ((float) updatedSampleSize) / updatedNumRows;
-				int updatedSampleCardinality = valsCount.size()
-						- numRemovedClasses;
-				float updatedDuj1Fraction = ((float) updatedSampleSize)
-						/ (updatedSampleSize - (1 - updatedQ) * freqCounts[0]);
-				float updatedDuj1 = updatedDuj1Fraction
-						* updatedSampleCardinality;
-				float updatedGamma = 0;
-				for (int i = 1; i <= freqCounts.length; i++) {
-					updatedGamma += i * (i - 1) * freqCounts[i - 1];
-				}
-				updatedGamma *= updatedDuj1 / updatedSampleSize
-						/ updatedSampleSize;
-				updatedGamma += updatedDuj1 / updatedNumRows - 1;
-				updatedGamma = Math.max(updatedGamma, 0);
-
-				estimate = (int) (updatedDuj1Fraction * (updatedSampleCardinality - freqCounts[0]
-						* (1 - updatedQ)
-						* Math.log(1 - updatedQ)
-						* updatedGamma / updatedQ))
-						+ numRemovedClasses;
-			}
-
-		} else {
-			// Sh3 - end of section 3
-			float fraq1Numer = 0;
-			float fraq1Denom = 0;
-			float fraq2Numer = 0;
-			float fraq2Denom = 0;
-			for (int i = 1; i <= freqCounts.length; i++) {
-				fraq1Numer += i * q * q * Math.pow(1 - q * q, i - 1)
-						* freqCounts[i - 1];
-				fraq1Denom += Math.pow(_1MinusQ, i) * (Math.pow(1 + q, i) - 1)
-						* freqCounts[i - 1];
-				fraq2Numer += Math.pow(_1MinusQ, i) * freqCounts[i - 1];
-				fraq2Denom += i * q * Math.pow(_1MinusQ, i - 1)
-						* freqCounts[i - 1];
-			}
-			estimate = (int) (valsCount.size() + freqCounts[0] * fraq1Numer
-					/ fraq1Denom * fraq2Numer * fraq2Numer / fraq2Denom
-					/ fraq2Denom);
-		}
-		return estimate < 1 ? 1 : estimate;
+		double q = ((double) sampleSize) / nRows;
+		double f1 = freqCounts[0];
+		
+		//compute basic Duj1 estimate
+		double duj1 = getDuj1Estimate(q, f1, sampleSize, numVals);
+		
+		//compute gamma based on Duj1
+		double gamma = getGammaSquared(duj1, freqCounts, sampleSize, nRows);
+		double d = -1;
+		
+		//core hybrid estimator based on gamma
+		if (gamma < HAAS_AND_STOKES_ALPHA1)
+			d = getDuj2Estimate(q, f1, sampleSize, numVals, gamma);
+		else if (gamma < HAAS_AND_STOKES_ALPHA2)
+			d = getDuj2aEstimate(q, freqCounts, sampleSize, numVals, gamma, nRows, solveCache);
+		else
+			d = getSh3Estimate(q, freqCounts, numVals);
+		
+		//round and ensure min value 1
+		return Math.max(1, (int)Math.round(d));
 	}
 
-	private static HashMap<DblArray, Integer> getValCounts(
-			ReaderColumnSelection sampleRowsReader) 
+	private static HashMap<DblArray, Integer> getValCounts(ReaderColumnSelection sampleRowsReader) 
 	{
 		HashMap<DblArray, Integer> valsCount = new HashMap<DblArray, Integer>();
 		DblArray val = null;
@@ -681,27 +562,179 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator
 			if (cnt == null)
 				cnt = 0;
 			cnt++;
-			valsCount.put(val, cnt);
+			valsCount.put(new DblArray(val), cnt);
 		}
 		return valsCount;
 	}
 
-	private static int[] getFreqCounts(HashMap<DblArray, Integer> valsCount) 
+	/**
+	 * Creates an inverted histogram, where freqCounts[i-1] indicates 
+	 * how many values occurred with a frequency i. Note that freqCounts[0]
+	 * represents the special values of the number of singletons. 
+	 * 
+	 * @param ubm uncompressed bitmap
+	 * @return frequency counts
+	 */
+	private static int[] getFreqCounts(UncompressedBitmap ubm) 
 	{
+		//determine max frequency
+		int numVals = ubm.getNumValues();
 		int maxCount = 0;
-		for (Integer c : valsCount.values()) {
-			if (c > maxCount)
-				maxCount = c;
-		}
-		
-		/*
-		 * freqCounts[i-1] = how many values occured with a frequecy i
-		 */
+		for( int i=0; i<numVals; i++ )
+			maxCount = Math.max(maxCount, ubm.getNumOffsets(i));
+			
+		//create frequency histogram
 		int[] freqCounts = new int[maxCount];
-		for (Integer c : valsCount.values()) {
-			freqCounts[c - 1]++;
-		}
+		for( int i=0; i<numVals; i++ )
+			freqCounts[ubm.getNumOffsets(i)-1] ++;
+
 		return freqCounts;
 
 	}
+
+	/**
+	 * Computes the "unsmoothed first-order jackknife estimator" (Eq 11).
+	 * 
+	 */
+	private static double getDuj1Estimate(double q, double f1, int n, int dn) {
+		return dn / (1 - ((1-q) * f1)/n);
+	}
+	
+	/**
+	 * Computes the "unsmoothed second-order jackknife estimator" (Eq 18b).
+	 * 
+	 */
+	private static double getDuj2Estimate(double q, double f1, int n, int dn, double gammaDuj1) {
+		return (dn - (1-q) * f1 * Math.log(1-q) * gammaDuj1 / q) / (1 - ((1-q) * f1)/n);
+	}
+	
+	/**
+	 * Computes the "unsmoothed second-order jackknife estimator" with additional  
+	 * stabilization procedure, which removes the classes whose frequency exceed c,
+	 * computes Duj2 over the reduced sample, and finally adds the removed frequencies.
+	 * 
+	 */
+	private static double getDuj2aEstimate(double q, int f[], int n, int dn, double gammaDuj1, int N, 
+			HashMap<Integer, Double> solveCache) {
+		int c = HAAS_AND_STOKES_UJ2A_CUT2 ? 
+			f.length/2+1 : HAAS_AND_STOKES_UJ2A_C+1;
+		
+		//compute adjusted sample size after removing classes that
+		//exceed a fixed frequency  c
+		int nB = 0, cardB = 0;
+		for( int i=c; i<=f.length; i++ ) 
+			if( f[i-1] != 0 ) {
+				nB += f[i-1] * i; //numVals times frequency 
+				cardB += f[i-1];
+			}
+		
+		//fallback to Duj2 over full sample if only high frequency columns
+		if( n - nB == 0 )
+			return getDuj2Estimate(q, f[0], n, dn, gammaDuj1);
+
+		//compute reduced population size via numeric solve
+		int updatedN = N; 
+		for( int i=c; i<=f.length; i++ )
+			if( f[i-1] != 0 )
+				updatedN -= f[i-1] * (!HAAS_AND_STOKES_UJ2A_SOLVE ? i/q :
+					getMethodOfMomentsEstimate(i, q, 1, N, solveCache));
+		
+		//remove classes that exceed a fixed frequency c
+		for( int i=c; i<=f.length; i++ )
+			f[i-1] = 0; 
+		
+		//compute duj2a over reduced sample
+		double updatedDuj1 = getDuj1Estimate(q, f[0], n-nB, dn-cardB);
+		double updatedGammaDuj1 = getGammaSquared(updatedDuj1, f, n-nB, updatedN);
+		double duj2 = getDuj2Estimate(q, f[0], n-nB, dn-cardB, updatedGammaDuj1);
+		return duj2 + cardB;		
+	}
+	
+	/**
+	 * Computed the "shlosser third-order estimator". (Eq 30b)
+	 * 
+	 * Note that this estimator can show anomalies with NaN as the results
+	 * due to terms such as Math.pow(1+q, i) which exceed Double.MAX_VALUE
+	 * even for moderately large i, e.g., q=0.05 at around 14K.
+	 * 
+	 */
+	private static double getSh3Estimate(double q, int[] f, double dn) {
+		double fraq11 = 0, fraq12 = 0, fraq21 = 0, fraq22 = 0;
+		for( int i=1; i<=f.length; i++ ) 
+			if( f[i-1] != 0 ) {
+				fraq11 += i * q*q * Math.pow(1 - q*q, i-1) * f[i-1];
+				//NOTE: numerically unstable due to Math.pow(1+q, i) overflows
+				//fraq12 += Math.pow(1 - q, i) * (Math.pow(1+q, i)-1) * f[i-1];
+				fraq12 += (Math.pow(1 - q*q, i) - Math.pow(1 - q, i)) * f[i-1];
+				fraq21 += Math.pow(1 - q, i) * f[i-1];
+				fraq22 += i * q * Math.pow(1 - q, i-1) * f[i-1];
+			}
+		return dn + f[0] * fraq11/fraq12 * Math.pow(fraq21/fraq22, 2); 
+	}
+	
+	/**
+	 * Computes the "squared coefficient of variation" based on a given 
+	 * initial estimate D (Eq 16).
+	 * 
+	 */
+	private static double getGammaSquared(double D, int[] f, int n, int N) {
+		double gamma = 0;
+		for( int i=1; i<=f.length; i++) 
+			if( f[i-1] != 0 )
+				gamma += i * (i-1) * f[i-1];
+		gamma *= D / n / n;
+		gamma += D / N - 1;
+		return Math.max(0, gamma);
+	}
+	
+	/**
+	 * Solves the method-of-moments estimate numerically. We use a cache
+	 * on the same observed instances in the sample as q is constant and
+	 * min/max are chosen conservatively.
+	 * 
+	 */
+	private static double getMethodOfMomentsEstimate(int nj, double q, double min, double max, 
+		HashMap<Integer, Double> solveCache) {
+		if( solveCache.containsKey(nj) )
+			return solveCache.get(nj);
+		
+		double est = UnivariateSolverUtils
+			.solve(new MethodOfMomentsFunction(nj, q), min, max, 1e-9);
+		
+		if( solveCache.size()<MAX_SOLVE_CACHE_SIZE )
+			solveCache.put(nj, est);
+		
+		return est;
+	}
+	
+	/*
+	 * In the shlosserSmoothedJackknifeEstimator as long as the sample size did
+	 * not change, we will have the same critical value each time the estimator
+	 * is used (given that alpha is the same). We cache the critical value to
+	 * avoid recomputing it in each call.
+	 */
+	private static class CriticalValue {
+		public final double uniformityCriticalValue;
+		public final int usedSampleSize;
+		
+		public CriticalValue(double cv, int size) {
+			uniformityCriticalValue = cv;
+			usedSampleSize = size;
+		} 
+	}
+	
+	private static class MethodOfMomentsFunction implements UnivariateFunction {
+		private final int _nj;
+		private final double _q;
+		
+		public MethodOfMomentsFunction(int nj, double q) {
+			_nj = nj;
+			_q = q;
+		}
+		
+		@Override
+		public double value(double x) {
+			return _q*x / (1-Math.pow(1-_q, x)) - _nj;
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
index 430783d..60acdeb 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
@@ -19,51 +19,53 @@
 
 package org.apache.sysml.runtime.compress.estim;
 
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
+
 /**
  * 
  * A helper reusable object for maintaining bitmap sizes
  */
 public class CompressedSizeInfo 
 {
-	private int _estCard = -1;
-	private long _rleSize = -1; 
-	private long _oleSize = -1;
-
-	public CompressedSizeInfo() {
-		
-	}
+	private final int _estCard;
+	private final int _estNnz;
+	private final long _rleSize; 
+	private final long _oleSize;
+	private final long _ddcSize;
 
-	public CompressedSizeInfo(int estCard, long rleSize, long oleSize) {
+	public CompressedSizeInfo(int estCard, int estNnz, long rleSize, long oleSize, long ddcSize) {
 		_estCard = estCard;
+		_estNnz = estNnz;
 		_rleSize = rleSize;
 		_oleSize = oleSize;
+		_ddcSize = ddcSize;
 	}
 
-	public void setRLESize(long rleSize) {
-		_rleSize = rleSize;
-	}
-	
 	public long getRLESize() {
 		return _rleSize;
 	}
-	
-	public void setOLESize(long oleSize) {
-		_oleSize = oleSize;
-	}
 
 	public long getOLESize() {
 		return _oleSize;
 	}
-
-	public long getMinSize() {
-		return Math.min(_rleSize, _oleSize);
+	
+	public long getDDCSize() {
+		return CompressedMatrixBlock.ALLOW_DDC_ENCODING ? 
+			_ddcSize : Long.MAX_VALUE; 
 	}
 
-	public void setEstCardinality(int estCard) {
-		_estCard = estCard;
+	public long getMinSize() {
+		return Math.min(Math.min(
+			getRLESize(), 
+			getOLESize()),
+			getDDCSize());
 	}
 
-	public int getEstCarinality() {
+	public int getEstCard() {
 		return _estCard;
 	}
+	
+	public int getEstNnz() {
+		return _estNnz;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java b/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
index c142103..63a092c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
@@ -19,14 +19,16 @@
 
 package org.apache.sysml.runtime.compress.estim;
 
+import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
 public class SizeEstimatorFactory 
 {
-	public static final float SAMPLING_RATIO = 0.01f; //conservative default
+	public static final double SAMPLING_RATIO = 0.05; //conservative default
+	public static final boolean EXTRACT_SAMPLE_ONCE = true;
 
 	@SuppressWarnings("unused")
-	public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, int numRows) {
+	public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, int numRows) throws DMLRuntimeException {
 		return (SAMPLING_RATIO == 1.0) ?
 				new CompressedSizeEstimatorExact(data):
 				new CompressedSizeEstimatorSample(data, (int) (numRows*SAMPLING_RATIO));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java b/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
index 37b2984..f4d9f1c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.runtime.compress.utils;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 
 import org.apache.sysml.runtime.compress.ColGroup;
@@ -70,4 +71,19 @@ public class ConverterUtils
 		else 
 			return vector.getDenseBlock();
 	}
+
+	public static MatrixBlock getUncompressedColBlock( ColGroup group )
+	{
+		MatrixBlock ret = null;
+		if( group instanceof ColGroupUncompressed ) {
+			ret = ((ColGroupUncompressed) group).getData();
+		}
+		else {
+			ArrayList<ColGroup> tmpGroup = new ArrayList<ColGroup>(Arrays.asList(group));
+			ColGroupUncompressed decompressedCols = new ColGroupUncompressed(tmpGroup);
+			ret = decompressedCols.getData();
+		}
+		
+		return ret;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
index ef4d476..0f2f091 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
@@ -66,11 +66,18 @@ public class IntArrayList
 		_size++;
 	}
 
+	/**
+	 * Returns the underlying array of offsets. Note that this array might be 
+	 * physically larger than the actual length of the offset lists. Use size() 
+	 * to obtain the actual length.
+	 * 
+	 * @return
+	 */
 	public int[] extractValues() {
 		if( _size == 1 )
 			return new int[] { _val0 };
 		else
-			return Arrays.copyOfRange(_data, 0, _size);
+			return _data;
 	}
 
 	private void resize() {
@@ -80,8 +87,6 @@ public class IntArrayList
 					"IntArrayList resize leads to integer overflow: size=" + _size);
 
 		// resize data array and copy existing contents
-		int[] newdata = new int[_data.length * RESIZE_FACTOR];
-		System.arraycopy(_data, 0, newdata, 0, _size);
-		_data = newdata;
+		_data = Arrays.copyOf(_data, _data.length * RESIZE_FACTOR);
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
index 3bf0ad4..7a4a013 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
@@ -28,6 +28,86 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
  */
 public class LinearAlgebraUtils {
 
+	public static double dotProduct(double[] a, double[] b, final int len) 
+	{
+		double val = 0;
+		final int bn = len % 8;
+
+		// compute rest
+		for (int i = 0; i < bn; i++)
+			val += a[i] * b[i];
+
+		// unrolled 8-block (for better instruction-level parallelism)
+		for (int i = bn; i < len; i += 8) {
+			// read 64B cachelines of a and b
+			// compute cval' = sum(a * b) + cval
+			val += a[i + 0] * b[i + 0] 
+				 + a[i + 1] * b[i + 1] 
+				 + a[i + 2] * b[i + 2] 
+				 + a[i + 3] * b[i + 3] 
+				 + a[i + 4] * b[i + 4]
+				 + a[i + 5] * b[i + 5] 
+				 + a[i + 6] * b[i + 6] 
+				 + a[i + 7] * b[i + 7];
+		}
+
+		// scalar result
+		return val;
+	}
+
+	public static double dotProduct( double[] a, double[] b, int ai, int bi, final int len )
+	{
+		double val = 0;
+		final int bn = len%8;
+				
+		//compute rest
+		for( int i = 0; i < bn; i++, ai++, bi++ )
+			val += a[ ai ] * b[ bi ];
+		
+		//unrolled 8-block (for better instruction-level parallelism)
+		for( int i = bn; i < len; i+=8, ai+=8, bi+=8 )
+		{
+			//read 64B cachelines of a and b
+			//compute cval' = sum(a * b) + cval
+			val += a[ ai+0 ] * b[ bi+0 ]
+			     + a[ ai+1 ] * b[ bi+1 ]
+			     + a[ ai+2 ] * b[ bi+2 ]
+			     + a[ ai+3 ] * b[ bi+3 ]
+			     + a[ ai+4 ] * b[ bi+4 ]
+			     + a[ ai+5 ] * b[ bi+5 ]
+			     + a[ ai+6 ] * b[ bi+6 ]
+			     + a[ ai+7 ] * b[ bi+7 ];
+		}
+		
+		//scalar result
+		return val; 
+	}
+
+	public static void vectAdd( double[] a, double[] c, int ai, int ci, final int len )
+	{
+		final int bn = len%8;
+		
+		//rest, not aligned to 8-blocks
+		for( int j = 0; j < bn; j++, ai++, ci++)
+			c[ ci ] += a[ ai ];
+		
+		//unrolled 8-block  (for better instruction-level parallelism)
+		for( int j = bn; j < len; j+=8, ai+=8, ci+=8) 
+		{
+			//read 64B cachelines of a and c
+			//compute c' = c * a
+			//write back 64B cacheline of c = c'
+			c[ ci+0 ] += a[ ai+0 ];
+			c[ ci+1 ] += a[ ai+1 ];
+			c[ ci+2 ] += a[ ai+2 ];
+			c[ ci+3 ] += a[ ai+3 ];
+			c[ ci+4 ] += a[ ai+4 ];
+			c[ ci+5 ] += a[ ai+5 ];
+			c[ ci+6 ] += a[ ai+6 ];
+			c[ ci+7 ] += a[ ai+7 ];
+		}
+	}
+
 	public static void vectAdd( final double aval, double[] c, char[] bix, final int bi, final int ci, final int len )
 	{
 		final int bn = len%8;
@@ -72,6 +152,53 @@ public class LinearAlgebraUtils {
 		}
 	}
 
+	public static void vectMultiplyAdd( final double aval, double[] b, double[] c, int[] bix, final int bi, final int ci, final int len )
+	{
+		final int bn = (len-bi)%8;
+		
+		//rest, not aligned to 8-blocks
+		for( int j = bi; j < bi+bn; j++ )
+			c[ ci + bix[j] ] += aval * b[ j ];
+		
+		//unrolled 8-block (for better instruction-level parallelism)
+		for( int j = bi+bn; j < len; j+=8 )
+		{
+			c[ ci+bix[j+0] ] += aval * b[ j+0 ];
+			c[ ci+bix[j+1] ] += aval * b[ j+1 ];
+			c[ ci+bix[j+2] ] += aval * b[ j+2 ];
+			c[ ci+bix[j+3] ] += aval * b[ j+3 ];
+			c[ ci+bix[j+4] ] += aval * b[ j+4 ];
+			c[ ci+bix[j+5] ] += aval * b[ j+5 ];
+			c[ ci+bix[j+6] ] += aval * b[ j+6 ];
+			c[ ci+bix[j+7] ] += aval * b[ j+7 ];
+		}
+	}
+
+	public static void vectMultiplyAdd( final double aval, double[] b, double[] c, int bi, int ci, final int len )
+	{
+		final int bn = len%8;
+		
+		//rest, not aligned to 8-blocks
+		for( int j = 0; j < bn; j++, bi++, ci++)
+			c[ ci ] += aval * b[ bi ];
+		
+		//unrolled 8-block  (for better instruction-level parallelism)
+		for( int j = bn; j < len; j+=8, bi+=8, ci+=8) 
+		{
+			//read 64B cachelines of b and c
+			//compute c' = aval * b + c
+			//write back 64B cacheline of c = c'
+			c[ ci+0 ] += aval * b[ bi+0 ];
+			c[ ci+1 ] += aval * b[ bi+1 ];
+			c[ ci+2 ] += aval * b[ bi+2 ];
+			c[ ci+3 ] += aval * b[ bi+3 ];
+			c[ ci+4 ] += aval * b[ bi+4 ];
+			c[ ci+5 ] += aval * b[ bi+5 ];
+			c[ ci+6 ] += aval * b[ bi+6 ];
+			c[ ci+7 ] += aval * b[ bi+7 ];
+		}
+	}
+
 	public static double vectSum( double[] a, char[] bix, final int ai, final int bi, final int len )
 	{
 		double val = 0;
@@ -122,6 +249,18 @@ public class LinearAlgebraUtils {
 		return val;
 	}
 
+	public static void copyUpperToLowerTriangle( MatrixBlock ret )
+	{
+		double[] c = ret.getDenseBlock();
+		final int m = ret.getNumRows();
+		final int n = ret.getNumColumns();
+		
+		//copy symmetric values
+		for( int i=0, uix=0; i<m; i++, uix+=n )
+			for( int j=i+1, lix=j*n+i; j<n; j++, lix+=n )
+				c[ lix ] = c[ uix+j ];
+	}
+
 	public static void copyNonZerosToRowCol( MatrixBlock ret, MatrixBlock tmp, int ix )
 	{
 		for(int i=0; i<tmp.getNumColumns(); i++) {
@@ -132,4 +271,29 @@ public class LinearAlgebraUtils {
 			}
 		}
 	}
+	
+	/**
+	 * Obtain the index of the closest element in a to the value x.
+	 * 
+	 * @param a array of ints
+	 * @param x value
+	 * @return the index of the closest element in a to the value x
+	 */
+	public static int getClosestK(int[] a, int x) {
+
+		int low = 0;
+		int high = a.length - 1;
+
+		while (low < high) {
+			int mid = (low + high) / 2;
+			int d1 = Math.abs(a[mid] - x);
+			int d2 = Math.abs(a[mid + 1] - x);
+			if (d2 <= d1) {
+				low = mid + 1;
+			} else {
+				high = mid;
+			}
+		}
+		return high;
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicCompressionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicCompressionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicCompressionTest.java
index 2ec2f61..2d1b592 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicCompressionTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicCompressionTest.java
@@ -44,9 +44,10 @@ public class BasicCompressionTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -70,13 +71,23 @@ public class BasicCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -105,13 +116,13 @@ public class BasicCompressionTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runCompressionTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -144,8 +155,10 @@ public class BasicCompressionTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -164,5 +177,8 @@ public class BasicCompressionTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicGetValueTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicGetValueTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicGetValueTest.java
index 0515acb..47c9fcc 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicGetValueTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicGetValueTest.java
@@ -46,9 +46,10 @@ public class BasicGetValueTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -72,13 +73,23 @@ public class BasicGetValueTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runGetValueTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runGetValueTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runGetValueTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runGetValueTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runGetValueTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runGetValueTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -107,13 +118,13 @@ public class BasicGetValueTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runGetValueTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runGetValueTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runGetValueTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runGetValueTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -146,8 +157,10 @@ public class BasicGetValueTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -166,5 +179,8 @@ public class BasicGetValueTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixAppendTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixAppendTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixAppendTest.java
index 93324b3..3bd6f0c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixAppendTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixAppendTest.java
@@ -45,9 +45,10 @@ public class BasicMatrixAppendTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -71,13 +72,23 @@ public class BasicMatrixAppendTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runMatrixAppendTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runMatrixAppendTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runMatrixAppendTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runMatrixAppendTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runMatrixAppendTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runMatrixAppendTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -106,13 +117,13 @@ public class BasicMatrixAppendTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runMatrixAppendTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runMatrixAppendTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runMatrixAppendTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runMatrixAppendTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -145,8 +156,10 @@ public class BasicMatrixAppendTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols1, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(rows, cols2, 1, 1, 1.0, 3));
@@ -172,5 +185,8 @@ public class BasicMatrixAppendTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixMultChainTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixMultChainTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixMultChainTest.java
index 8f17f91..fe46107 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixMultChainTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixMultChainTest.java
@@ -45,9 +45,10 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -71,13 +72,23 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtXv, true);
+	public void testDenseRoundRandDataOLENoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtXv, true);
+	public void testSparseRoundRandDataOLENoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCNoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCNoWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, true);
 	}
 	
 	@Test
@@ -106,13 +117,23 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtXv, false);
+	public void testDenseRoundRandDataOLENoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, false);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataOLENoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtXv, false);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCNoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtXv, false);
+	public void testSparseRoundRandDataDDCNoWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ChainType.XtXv, false);
 	}
 	
 	@Test
@@ -141,13 +162,23 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtwXv, true);
+	public void testDenseRoundRandDataOLEWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataOLEWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataWeightsCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtwXv, true);
+	public void testDenseRoundRandDataDDCWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ChainType.XtwXv, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCWeightsCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ChainType.XtwXv, true);
 	}
 	
 	@Test
@@ -176,13 +207,13 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND, ChainType.XtwXv, false);
+	public void testDenseRoundRandDataOLEWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataWeightsNoCompression() {
-		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND, ChainType.XtwXv, false);
+	public void testSparseRoundRandDataOLEWeightsNoCompression() {
+		runMatrixMultChainTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ChainType.XtwXv, false);
 	}
 	
 	@Test
@@ -214,8 +245,10 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			MatrixBlock vector1 = DataConverter.convertToMatrixBlock(
 					TestUtils.generateTestMatrix(cols, 1, 0, 1, 1.0, 3));
@@ -241,5 +274,8 @@ public class BasicMatrixMultChainTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/37a215bc/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixTransposeSelfMultTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixTransposeSelfMultTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixTransposeSelfMultTest.java
index ff2a103..c00f25f 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixTransposeSelfMultTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicMatrixTransposeSelfMultTest.java
@@ -45,9 +45,10 @@ public class BasicMatrixTransposeSelfMultTest extends AutomatedTestBase
 	}
 	
 	public enum ValueType {
-		RAND,
-		RAND_ROUND,
-		CONST,
+		RAND, //UC
+		CONST, //RLE
+		RAND_ROUND_OLE, //OLE
+		RAND_ROUND_DDC, //RLE
 	}
 	
 	@Override
@@ -71,13 +72,23 @@ public class BasicMatrixTransposeSelfMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, true);
+	public void testDenseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, true);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, true);
+	public void testSparseRoundRandDataOLECompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, true);
+	}
+	
+	@Test
+	public void testDenseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_DDC, true);
+	}
+	
+	@Test
+	public void testSparseRoundRandDataDDCCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, true);
 	}
 	
 	@Test
@@ -106,13 +117,13 @@ public class BasicMatrixTransposeSelfMultTest extends AutomatedTestBase
 	}
 	
 	@Test
-	public void testDenseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND, false);
+	public void testDenseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.DENSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
-	public void testSparseRoundRandDataNoCompression() {
-		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND, false);
+	public void testSparseRoundRandDataOLENoCompression() {
+		runTransposeSelfMatrixMultTest(SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, false);
 	}
 	
 	@Test
@@ -145,8 +156,10 @@ public class BasicMatrixTransposeSelfMultTest extends AutomatedTestBase
 			//generate input data
 			double min = (vtype==ValueType.CONST)? 10 : -10;
 			double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
-			if( vtype==ValueType.RAND_ROUND )
+			if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) {
+				CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC);
 				input = TestUtils.round(input);
+			}
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
 			
 			//compress given matrix block
@@ -168,5 +181,8 @@ public class BasicMatrixTransposeSelfMultTest extends AutomatedTestBase
 		catch(Exception ex) {
 			throw new RuntimeException(ex);
 		}
+		finally {
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
 	}
 }