You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/17 21:31:57 UTC

[7/7] incubator-systemml git commit: [SYSTEMML-766][SYSTEMML-810] Fix missing licenses / unix line delimiters

[SYSTEMML-766][SYSTEMML-810] Fix missing licenses / unix line delimiters

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/da318739
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/da318739
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/da318739

Branch: refs/heads/master
Commit: da3187392cb67e8a5bd3a494fae68b8dfbc0d883
Parents: 2b7fdb2
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Jul 17 14:31:27 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Jul 17 14:31:27 2016 -0700

----------------------------------------------------------------------
 .../runtime/compress/BitmapDecoderOLE.java      |  254 +-
 .../runtime/compress/BitmapDecoderRLE.java      |  234 +-
 .../sysml/runtime/compress/BitmapEncoder.java   |  784 ++---
 .../apache/sysml/runtime/compress/ColGroup.java |  518 ++--
 .../sysml/runtime/compress/ColGroupBitmap.java  |  996 +++----
 .../sysml/runtime/compress/ColGroupOLE.java     | 1268 ++++-----
 .../sysml/runtime/compress/ColGroupRLE.java     | 1234 ++++----
 .../runtime/compress/ColGroupUncompressed.java  |  720 ++---
 .../runtime/compress/CompressedMatrixBlock.java | 2684 +++++++++---------
 .../runtime/compress/PlanningBinPacker.java     |  382 +--
 .../runtime/compress/PlanningCoCodingGroup.java |  206 +-
 .../compress/PlanningGroupMergeAction.java      |  146 +-
 .../runtime/compress/ReaderColumnSelection.java |  128 +-
 .../compress/ReaderColumnSelectionDense.java    |  136 +-
 .../ReaderColumnSelectionDenseSample.java       |  172 +-
 .../compress/ReaderColumnSelectionSparse.java   |  230 +-
 .../runtime/compress/UncompressedBitmap.java    |  202 +-
 .../compress/estim/CompressedSizeEstimator.java |  290 +-
 .../estim/CompressedSizeEstimatorExact.java     |  106 +-
 .../estim/CompressedSizeEstimatorSample.java    | 1534 +++++-----
 .../compress/estim/CompressedSizeInfo.java      |  138 +-
 .../sysml/runtime/compress/utils/DblArray.java  |  182 +-
 .../compress/utils/DblArrayIntListHashMap.java  |  358 +--
 .../compress/utils/DoubleIntListHashMap.java    |  362 +--
 .../compress/utils/LinearAlgebraUtils.java      |  766 ++---
 .../instructions/cp/PlusMultCPInstruction.java  |   19 +
 src/test/scripts/functions/compress/LinregCG.R  |   72 +-
 .../scripts/functions/compress/LinregCG.dml     |   70 +-
 28 files changed, 7105 insertions(+), 7086 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
index fc6e861..539d1f5 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
@@ -1,127 +1,127 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-import java.util.Iterator;
-
-/**
- * General-purpose iterator to decode a compressed OLE bitmap.
- *  
- */
-public final class BitmapDecoderOLE implements Iterator<Integer> 
-{
-	// pointer to the compressed bitmap
-	private int _bmOff;
-	private int _bmLen;
-	private char[] _bmPtr;
-
-	// The index of the current block. Block 0 covers bits 1 through 2^16
-	private int _blockIx;
-
-	// The offset where the current block starts within the bitmap.
-	private int _blockStartOffset;
-
-	// The number of offsets in the current block.
-	private int _curBlockSize;
-
-	// The offset <b>in the current block</b> the <b>next</b> element we will
-	// read from the bitmap, or bmPtr.length if we are done.
-	private int _nextBmOffset;
-
-	/**
-	 * Point this object at the beginning of a particular bitmap. After a call
-	 * to this method, the next call to {@link #nextOffset()} will return the
-	 * offset of the first bit in the specified bitmap.
-	 * 
-	 * @param bmPtr
-	 *            pointer to a compressed bitmap
-	 */
-	public BitmapDecoderOLE(char[] bmPtr, int off, int len) {
-		_bmOff = off;
-		_bmLen = len;
-		_bmPtr = bmPtr;
-		_blockIx = 0;
-		_blockStartOffset = 0;
-		_curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
-		if (_curBlockSize < 0) {
-			throw new RuntimeException(String.format(
-					"Negative block size %d at position %d of %s",
-					_curBlockSize, _blockStartOffset, Arrays.toString(bmPtr)));
-		}
-		_nextBmOffset = 0;
-
-		// Advance past any zero-length blocks at the beginning of the array
-		while (_blockStartOffset < _bmLen
-				&& _nextBmOffset >= _curBlockSize) {
-			advanceToNextBlock();
-		}
-	}
-
-	@Override
-	public Integer next() {
-		if( !hasNext() )
-			throw new RuntimeException("No next offset existing.");
-		
-		// Grab the lookahead value Note the +1 in the array indexing; 
-		// the first number in a block is the block size
-		int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset];
-		int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ)
-				+ offsetFromBlockBegin;
-		_nextBmOffset++;
-
-		// Advance to next non-empty block if we reached the end of the block.
-		while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) {
-			advanceToNextBlock();
-		}
-
-		return ret;
-	}
-
-	@Override
-	public boolean hasNext() {
-		return _blockStartOffset < _bmLen;
-	}
-
-	@Override
-	public void remove() {
-		throw new RuntimeException("Not implemented for BitmapDecoderOLE.");
-	}
-
-	/**
-	 * Move forward to the next block. Does not skip empty blocks.
-	 */
-	private void advanceToNextBlock() {
-		_blockStartOffset += (1 + _curBlockSize);
-		_blockIx++;
-		if (_blockStartOffset >= _bmLen) {
-			// Read past last block
-			return;
-		}
-
-		_curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
-		if (_curBlockSize < 0) {
-			throw new RuntimeException(String.format(
-					"Negative block size %d at position %d of %s",
-					_curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr)));
-		}
-		_nextBmOffset = 0;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ *  
+ */
+public final class BitmapDecoderOLE implements Iterator<Integer> 
+{
+	// pointer to the compressed bitmap
+	private int _bmOff;
+	private int _bmLen;
+	private char[] _bmPtr;
+
+	// The index of the current block. Block 0 covers bits 1 through 2^16
+	private int _blockIx;
+
+	// The offset where the current block starts within the bitmap.
+	private int _blockStartOffset;
+
+	// The number of offsets in the current block.
+	private int _curBlockSize;
+
+	// The offset <b>in the current block</b> the <b>next</b> element we will
+	// read from the bitmap, or bmPtr.length if we are done.
+	private int _nextBmOffset;
+
+	/**
+	 * Point this object at the beginning of a particular bitmap. After a call
+	 * to this method, the next call to {@link #nextOffset()} will return the
+	 * offset of the first bit in the specified bitmap.
+	 * 
+	 * @param bmPtr
+	 *            pointer to a compressed bitmap
+	 */
+	public BitmapDecoderOLE(char[] bmPtr, int off, int len) {
+		_bmOff = off;
+		_bmLen = len;
+		_bmPtr = bmPtr;
+		_blockIx = 0;
+		_blockStartOffset = 0;
+		_curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+		if (_curBlockSize < 0) {
+			throw new RuntimeException(String.format(
+					"Negative block size %d at position %d of %s",
+					_curBlockSize, _blockStartOffset, Arrays.toString(bmPtr)));
+		}
+		_nextBmOffset = 0;
+
+		// Advance past any zero-length blocks at the beginning of the array
+		while (_blockStartOffset < _bmLen
+				&& _nextBmOffset >= _curBlockSize) {
+			advanceToNextBlock();
+		}
+	}
+
+	@Override
+	public Integer next() {
+		if( !hasNext() )
+			throw new RuntimeException("No next offset existing.");
+		
+		// Grab the lookahead value Note the +1 in the array indexing; 
+		// the first number in a block is the block size
+		int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset];
+		int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ)
+				+ offsetFromBlockBegin;
+		_nextBmOffset++;
+
+		// Advance to next non-empty block if we reached the end of the block.
+		while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) {
+			advanceToNextBlock();
+		}
+
+		return ret;
+	}
+
+	@Override
+	public boolean hasNext() {
+		return _blockStartOffset < _bmLen;
+	}
+
+	@Override
+	public void remove() {
+		throw new RuntimeException("Not implemented for BitmapDecoderOLE.");
+	}
+
+	/**
+	 * Move forward to the next block. Does not skip empty blocks.
+	 */
+	private void advanceToNextBlock() {
+		_blockStartOffset += (1 + _curBlockSize);
+		_blockIx++;
+		if (_blockStartOffset >= _bmLen) {
+			// Read past last block
+			return;
+		}
+
+		_curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+		if (_curBlockSize < 0) {
+			throw new RuntimeException(String.format(
+					"Negative block size %d at position %d of %s",
+					_curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr)));
+		}
+		_nextBmOffset = 0;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
index 54f24ae..f2e05a4 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
@@ -1,117 +1,117 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Iterator;
-
-/**
- * General-purpose iterator to decode a compressed OLE bitmap.
- * 
- */
-public final class BitmapDecoderRLE implements Iterator<Integer>
-{
-	// pointer to the compressed bitmap
-	private int _bmOff;
-	private int _bmLen;
-	private char[] _bmPtr;
-
-	// The offset of the <b>next</b> element we will read from the bitmap, or
-	// bmPtr.length if we are done.
-	private int _nextBmOffset;
-
-	// The offset in the matrix column of the beginning of the current run
-	private int _runStartOffset;
-
-	// The length of the current run
-	private int _curRunLen;
-
-	// The number of bits that we have returned from the current run.
-	private int _runBitsReturned;
-
-	/**
-	 * Point this object at the beginning of a particular bitmap. After a call
-	 * to this method, the next call to {@link #nextOffset()} will return the
-	 * offset of the first bit in the specified bitmap.
-	 * 
-	 * @param bmPtr
-	 *            pointer to a compressed bitmap
-	 */
-	public BitmapDecoderRLE(char[] bmPtr, int off, int len) {
-		_bmOff = off;
-		_bmLen = len;
-		_bmPtr = bmPtr;
-		_nextBmOffset = 0;
-		_runStartOffset = 0;
-		_curRunLen = 0;
-		_runBitsReturned = 0;
-
-		if (0 == _bmLen) {
-			return; //no runs
-		}
-
-		// Advance to the beginning of the first non-empty run.
-		advanceToNextRun();
-	}
-
-	@Override
-	public Integer next() {
-		if( !hasNext() )
-			throw new RuntimeException("No next offset existing.");
-		
-		// Grab the lookahead value
-		int ret = _runStartOffset + _runBitsReturned;
-
-		_runBitsReturned++;
-
-		// Check for end of run
-		if (_runBitsReturned == _curRunLen) {
-			advanceToNextRun();
-		}
-
-		return ret;
-	}
-
-	@Override
-	public boolean hasNext() {
-		return _runBitsReturned < _curRunLen;
-	}
-	
-	@Override
-	public void remove() {
-		throw new RuntimeException("Not implemented for BitmapDecoderRLE.");
-	}
-	
-	/** Move forward to the next non-empty run. */
-	private void advanceToNextRun() {
-		// While loop needed because some runs are of length 0
-		while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) {
-
-			_runBitsReturned = 0;
-
-			// Read the distance to the next run
-			char delta = _bmPtr[_bmOff + _nextBmOffset];
-
-			// Run length is stored in the next element of the array
-			_runStartOffset += delta + _curRunLen;
-			_curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1];
-			_nextBmOffset += 2;
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ * 
+ */
+public final class BitmapDecoderRLE implements Iterator<Integer>
+{
+	// pointer to the compressed bitmap
+	private int _bmOff;
+	private int _bmLen;
+	private char[] _bmPtr;
+
+	// The offset of the <b>next</b> element we will read from the bitmap, or
+	// bmPtr.length if we are done.
+	private int _nextBmOffset;
+
+	// The offset in the matrix column of the beginning of the current run
+	private int _runStartOffset;
+
+	// The length of the current run
+	private int _curRunLen;
+
+	// The number of bits that we have returned from the current run.
+	private int _runBitsReturned;
+
+	/**
+	 * Point this object at the beginning of a particular bitmap. After a call
+	 * to this method, the next call to {@link #nextOffset()} will return the
+	 * offset of the first bit in the specified bitmap.
+	 * 
+	 * @param bmPtr
+	 *            pointer to a compressed bitmap
+	 */
+	public BitmapDecoderRLE(char[] bmPtr, int off, int len) {
+		_bmOff = off;
+		_bmLen = len;
+		_bmPtr = bmPtr;
+		_nextBmOffset = 0;
+		_runStartOffset = 0;
+		_curRunLen = 0;
+		_runBitsReturned = 0;
+
+		if (0 == _bmLen) {
+			return; //no runs
+		}
+
+		// Advance to the beginning of the first non-empty run.
+		advanceToNextRun();
+	}
+
+	@Override
+	public Integer next() {
+		if( !hasNext() )
+			throw new RuntimeException("No next offset existing.");
+		
+		// Grab the lookahead value
+		int ret = _runStartOffset + _runBitsReturned;
+
+		_runBitsReturned++;
+
+		// Check for end of run
+		if (_runBitsReturned == _curRunLen) {
+			advanceToNextRun();
+		}
+
+		return ret;
+	}
+
+	@Override
+	public boolean hasNext() {
+		return _runBitsReturned < _curRunLen;
+	}
+	
+	@Override
+	public void remove() {
+		throw new RuntimeException("Not implemented for BitmapDecoderRLE.");
+	}
+	
+	/** Move forward to the next non-empty run. */
+	private void advanceToNextRun() {
+		// While loop needed because some runs are of length 0
+		while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) {
+
+			_runBitsReturned = 0;
+
+			// Read the distance to the next run
+			char delta = _bmPtr[_bmOff + _nextBmOffset];
+
+			// Run length is stored in the next element of the array
+			_runStartOffset += delta + _curRunLen;
+			_curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1];
+			_nextBmOffset += 2;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
index 8a535e1..75ddf9a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
@@ -1,392 +1,392 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.compress.utils.DblArray;
-import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.IntArrayList;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.SparseBlock;
-
-
-/** 
- * Static functions for encoding bitmaps in various ways. 
- * 
- */
-public class BitmapEncoder 
-{
-	/** Size of the blocks used in a blocked bitmap representation. */
-	public static final int BITMAP_BLOCK_SZ = 65536;
-	
-	/**
-	 * Generate uncompressed bitmaps for a set of columns in an uncompressed
-	 * matrix block.
-	 * 
-	 * @param colIndices
-	 *            indexes (within the block) of the columns to extract
-	 * @param rawblock
-	 *            an uncompressed matrix block; can be dense or sparse
-	 * @return uncompressed bitmap representation of the columns
-	 * @throws DMLRuntimeException 
-	 */
-	public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) 
-	{
-		//note: no sparse column selection reader because low potential
-		//single column selection
-		if( colIndices.length==1 ) {
-			return extractBitmap(colIndices[0], rawblock, 
-					!CompressedMatrixBlock.MATERIALIZE_ZEROS);
-		}
-		//multiple column selection	(general case)
-		else { 
-			ReaderColumnSelection reader = null;
-			if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT )
-				reader = new ReaderColumnSelectionSparse(rawblock, colIndices, 
-						!CompressedMatrixBlock.MATERIALIZE_ZEROS);
-			else
-				reader = new ReaderColumnSelectionDense(rawblock, colIndices,
-						!CompressedMatrixBlock.MATERIALIZE_ZEROS); 
-			
-			return extractBitmap(colIndices, rawblock, reader);
-		}
-	}
-
-	/**
-	 * 
-	 * @param colIndices
-	 * @param rawblock
-	 * @param sampleIndexes
-	 * @return
-	 */
-	public static UncompressedBitmap extractBitmapFromSample(int[] colIndices,
-			MatrixBlock rawblock, int[] sampleIndexes) 
-	{
-		//note: no sparse column selection reader because low potential
-		
-		//single column selection
-		if( colIndices.length==1 ) {
-			return extractBitmap(colIndices[0], rawblock, sampleIndexes,
-					!CompressedMatrixBlock.MATERIALIZE_ZEROS);
-		}
-		//multiple column selection	(general case)
-		else {			
-			return extractBitmap(colIndices, rawblock,
-					new ReaderColumnSelectionDenseSample(rawblock, colIndices,
-							sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS));	
-		}
-	}
-
-	/**
-	 * Encodes the bitmap as a series of run lengths and offsets.
-	 * <p>
-	 * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE}
-	 * !</b>
-	 * 
-	 * @param offsets
-	 *            uncompressed contents of the bitmap, expressed as a list of
-	 *            the offsets of different bits
-	 * @return compressed version of said bitmap
-	 */
-	public static char[] genRLEBitmap(int[] offsets) {
-		if( offsets.length == 0 )
-			return new char[0]; //empty list
-
-		// Use an ArrayList for correctness at the expense of temp space
-		ArrayList<Character> buf = new ArrayList<Character>();
-
-		// 1 + (position of last 1 in the previous run of 1's)
-		// We add 1 because runs may be of length zero.
-		int lastRunEnd = 0;
-
-		// Offset between the end of the previous run of 1's and the first 1 in
-		// the current run. Initialized below.
-		int curRunOff;
-
-		// Length of the most recent run of 1's
-		int curRunLen = 0;
-
-		// Current encoding is as follows:
-		// Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
-		// Positive entry: Entry encodes offset to next run of 1's. The next
-		// entry in the bitmap holds a run length.
-
-		// Special-case the first run to simplify the loop below.
-		int firstOff = offsets[0];
-
-		// The first run may start more than a short's worth of bits in
-		while (firstOff > Character.MAX_VALUE) {
-			buf.add(Character.MAX_VALUE);
-			buf.add((char) 0);
-			firstOff -= Character.MAX_VALUE;
-			lastRunEnd += Character.MAX_VALUE;
-		}
-
-		// Create the first run with an initial size of 1
-		curRunOff = firstOff;
-		curRunLen = 1;
-
-		// Process the remaining offsets
-		for (int i = 1; i < offsets.length; i++) {
-
-			int absOffset = offsets[i];
-
-			// 1 + (last position in run)
-			int curRunEnd = lastRunEnd + curRunOff + curRunLen;
-
-			if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
-				// End of a run, either because we hit a run of 0's or because the 
-				// number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
-				buf.add((char) curRunOff);
-				buf.add((char) curRunLen);
-
-				lastRunEnd = curRunEnd;
-				curRunOff = absOffset - lastRunEnd;
-
-				while (curRunOff > Character.MAX_VALUE) {
-					// SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
-					// Add zero-length runs until the offset is small enough.
-					buf.add(Character.MAX_VALUE);
-					buf.add((char) 0);
-					lastRunEnd += Character.MAX_VALUE;
-					curRunOff -= Character.MAX_VALUE;
-				}
-				
-				curRunLen = 1;
-			} else {
-				// Middle of a run
-				curRunLen++;
-			}
-		}
-
-		// Close out the last run
-		if (curRunLen >= 1) {
-			buf.add((char) curRunOff);
-			buf.add((char) curRunLen);
-		}
-
-		// Convert wasteful ArrayList to packed array.
-		char[] ret = new char[buf.size()];
-		for (int i = 0; i < buf.size(); i++) {
-			ret[i] = buf.get(i);
-		}
-		return ret;
-	}
-
-	/**
-	 * Encodes the bitmap in blocks of offsets. Within each block, the bits are
-	 * stored as absolute offsets from the start of the block.
-	 * 
-	 * @param offsets
-	 *            uncompressed contents of the bitmap, expressed as a list of
-	 *            the offsets of different bits
-	 * @return compressed version of said bitmap
-	 */
-	public static char[] genOffsetBitmap(int[] offsets) 
-	{
-		int lastOffset = offsets[offsets.length - 1];
-
-		// Build up the blocks
-		int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
-		// To simplify the logic, we make two passes.
-		// The first pass divides the offsets by block.
-		int[] blockLengths = new int[numBlocks];
-		Arrays.fill(blockLengths, 0);
-
-		for (int ix = 0; ix < offsets.length; ix++) {
-			int val = offsets[ix];
-			int blockForVal = val / BITMAP_BLOCK_SZ;
-
-			blockLengths[blockForVal]++;
-		}
-
-		// The second pass creates the blocks.
-		int totalSize = numBlocks;
-		for (int block = 0; block < numBlocks; block++) {
-			totalSize += blockLengths[block];
-		}
-		char[] encodedBlocks = new char[totalSize];
-
-		int inputIx = 0;
-		int blockStartIx = 0;
-		for (int block = 0; block < numBlocks; block++) {
-			int blockSz = blockLengths[block];
-
-			// First entry in the block is number of bits
-			encodedBlocks[blockStartIx] = (char) blockSz;
-
-			for (int i = 0; i < blockSz; i++) {
-				encodedBlocks[blockStartIx + i + 1] = (char) 
-						(offsets[inputIx+i] % BITMAP_BLOCK_SZ);
-			}
-
-			inputIx += blockSz;
-			blockStartIx += blockSz + 1;
-		}
-
-		return encodedBlocks;
-	}
-
-	/**
-	 * 
-	 * @param colIndex
-	 * @param rawblock
-	 * @return
-	 * @throws DMLRuntimeException 
-	 */
-	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) 
-	{
-		//probe map for distinct items (for value or value groups)
-		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
-		
-		//scan rows and probe/build distinct items
-		final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ?
-				rawblock.getNumColumns():rawblock.getNumRows();
-		
-		if( rawblock.isInSparseFormat() //SPARSE 
-			&& CompressedMatrixBlock.TRANSPOSE_INPUT )	
-		{
-			SparseBlock a = rawblock.getSparseBlock();
-			if( a != null && !a.isEmpty(colIndex) ) 
-			{
-				int apos = a.pos(colIndex);
-				int alen = a.size(colIndex);
-				int[] aix = a.indexes(colIndex);
-				double[] avals = a.values(colIndex);
-				
-				IntArrayList lstPtr0 = new IntArrayList(); //for 0 values
-				int last = -1;
-				//iterate over non-zero entries but fill in zeros
-				for( int j=apos; j<apos+alen; j++ ) 
-				{
-					//fill in zero values
-					if( !skipZeros )
-						for( int k=last+1; k<aix[j]; k++ ) 
-							lstPtr0.appendValue(k);
-					//handle non-zero value
-					IntArrayList lstPtr = distinctVals.get(avals[j]);	
-					if( lstPtr == null ) {
-						lstPtr = new IntArrayList();
-						distinctVals.appendValue(avals[j], lstPtr);
-					}
-					lstPtr.appendValue(aix[j]);
-					last = aix[j];
-				}
-				//fill in remaining zero values
-				if( !skipZeros ) {
-					for( int k=last+1; k<m; k++ )
-						lstPtr0.appendValue(k);
-					if( lstPtr0.size()>0 )
-						distinctVals.appendValue(0, lstPtr0);
-				}
-			}
-			else if( !skipZeros ) { //full 0 column 
-				IntArrayList lstPtr = new IntArrayList();
-				for( int i=0; i<m; i++ )
-					lstPtr.appendValue(i);
-				distinctVals.appendValue(0, lstPtr);
-			}
-		}
-		else //GENERAL CASE
-		{
-			for( int i=0; i<m; i++ ) {
-				double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
-						rawblock.quickGetValue(colIndex, i):
-						rawblock.quickGetValue(i, colIndex);
-				if( val!=0 || !skipZeros ) {		
-					IntArrayList lstPtr = distinctVals.get(val);	
-					if( lstPtr == null ) {
-						lstPtr = new IntArrayList();
-						distinctVals.appendValue(val, lstPtr);
-					}
-					lstPtr.appendValue(i);
-				}
-			}
-		}
-		
-		return new UncompressedBitmap(distinctVals);
-	}
-	
-	/**
-	 * 
-	 * @param colIndex
-	 * @param rawblock
-	 * @param sampleIndexes
-	 * @return
-	 */
-	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) 
-	{
-		//note: general case only because anyway binary search for small samples
-		
-		//probe map for distinct items (for value or value groups)
-		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
-		
-		//scan rows and probe/build distinct items
-		final int m = sampleIndexes.length;
-		for( int i=0; i<m; i++ ) {
-			int rowIndex = sampleIndexes[i]; 
-			double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
-					rawblock.quickGetValue(colIndex, rowIndex) : 
-					rawblock.quickGetValue(rowIndex, colIndex); 
-			if( val!=0 || !skipZeros ) {					
-				IntArrayList lstPtr = distinctVals.get(val);	
-				if( lstPtr == null ) {
-					lstPtr = new IntArrayList();
-					distinctVals.appendValue(val, lstPtr);
-				}
-				lstPtr.appendValue(i);
-			}
-		}
-
-		return new UncompressedBitmap(distinctVals);
-	}
-	
-	/**
-	 * 
-	 * @param colIndices
-	 * @param rawblock
-	 * @param rowReader
-	 * @return
-	 */
-	private static UncompressedBitmap extractBitmap(int[] colIndices,
-			MatrixBlock rawblock, ReaderColumnSelection rowReader) 
-	{
-		//probe map for distinct items (for value or value groups)
-		DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
-		
-		//scan rows and probe/build distinct items
-		DblArray cellVals = null;
-		while ((cellVals = rowReader.nextRow()) != null) {
-			IntArrayList lstPtr = distinctVals.get(cellVals);
-			if (lstPtr == null) {
-				//create new objects only on demand
-				lstPtr = new IntArrayList();
-				distinctVals.appendValue(new DblArray(cellVals), lstPtr);
-			}
-			lstPtr.appendValue(rowReader.getCurrentRowIndex());
-		}
-		
-		return new UncompressedBitmap(distinctVals, colIndices.length);
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.IntArrayList;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+
+/** 
+ * Static functions for encoding bitmaps in various ways. 
+ * 
+ */
+public class BitmapEncoder 
+{
+	/** Size of the blocks used in a blocked bitmap representation. */
+	public static final int BITMAP_BLOCK_SZ = 65536;
+	
+	/**
+	 * Generate uncompressed bitmaps for a set of columns in an uncompressed
+	 * matrix block.
+	 * 
+	 * @param colIndices
+	 *            indexes (within the block) of the columns to extract
+	 * @param rawblock
+	 *            an uncompressed matrix block; can be dense or sparse
+	 * @return uncompressed bitmap representation of the columns
+	 * @throws DMLRuntimeException 
+	 */
+	public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) 
+	{
+		//note: no sparse column selection reader because low potential
+		//single column selection
+		if( colIndices.length==1 ) {
+			return extractBitmap(colIndices[0], rawblock, 
+					!CompressedMatrixBlock.MATERIALIZE_ZEROS);
+		}
+		//multiple column selection	(general case)
+		else { 
+			ReaderColumnSelection reader = null;
+			if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT )
+				reader = new ReaderColumnSelectionSparse(rawblock, colIndices, 
+						!CompressedMatrixBlock.MATERIALIZE_ZEROS);
+			else
+				reader = new ReaderColumnSelectionDense(rawblock, colIndices,
+						!CompressedMatrixBlock.MATERIALIZE_ZEROS); 
+			
+			return extractBitmap(colIndices, rawblock, reader);
+		}
+	}
+
+	/**
+	 * 
+	 * @param colIndices
+	 * @param rawblock
+	 * @param sampleIndexes
+	 * @return
+	 */
+	public static UncompressedBitmap extractBitmapFromSample(int[] colIndices,
+			MatrixBlock rawblock, int[] sampleIndexes) 
+	{
+		//note: no sparse column selection reader because low potential
+		
+		//single column selection
+		if( colIndices.length==1 ) {
+			return extractBitmap(colIndices[0], rawblock, sampleIndexes,
+					!CompressedMatrixBlock.MATERIALIZE_ZEROS);
+		}
+		//multiple column selection	(general case)
+		else {			
+			return extractBitmap(colIndices, rawblock,
+					new ReaderColumnSelectionDenseSample(rawblock, colIndices,
+							sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS));	
+		}
+	}
+
+	/**
+	 * Encodes the bitmap as a series of run lengths and offsets.
+	 * <p>
+	 * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE}
+	 * !</b>
+	 * 
+	 * @param offsets
+	 *            uncompressed contents of the bitmap, expressed as a list of
+	 *            the offsets of different bits
+	 * @return compressed version of said bitmap
+	 */
+	public static char[] genRLEBitmap(int[] offsets) {
+		if( offsets.length == 0 )
+			return new char[0]; //empty list
+
+		// Use an ArrayList for correctness at the expense of temp space
+		ArrayList<Character> buf = new ArrayList<Character>();
+
+		// 1 + (position of last 1 in the previous run of 1's)
+		// We add 1 because runs may be of length zero.
+		int lastRunEnd = 0;
+
+		// Offset between the end of the previous run of 1's and the first 1 in
+		// the current run. Initialized below.
+		int curRunOff;
+
+		// Length of the most recent run of 1's
+		int curRunLen = 0;
+
+		// Current encoding is as follows:
+		// Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
+		// Positive entry: Entry encodes offset to next run of 1's. The next
+		// entry in the bitmap holds a run length.
+
+		// Special-case the first run to simplify the loop below.
+		int firstOff = offsets[0];
+
+		// The first run may start more than a short's worth of bits in
+		while (firstOff > Character.MAX_VALUE) {
+			buf.add(Character.MAX_VALUE);
+			buf.add((char) 0);
+			firstOff -= Character.MAX_VALUE;
+			lastRunEnd += Character.MAX_VALUE;
+		}
+
+		// Create the first run with an initial size of 1
+		curRunOff = firstOff;
+		curRunLen = 1;
+
+		// Process the remaining offsets
+		for (int i = 1; i < offsets.length; i++) {
+
+			int absOffset = offsets[i];
+
+			// 1 + (last position in run)
+			int curRunEnd = lastRunEnd + curRunOff + curRunLen;
+
+			if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
+				// End of a run, either because we hit a run of 0's or because the 
+				// number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
+				buf.add((char) curRunOff);
+				buf.add((char) curRunLen);
+
+				lastRunEnd = curRunEnd;
+				curRunOff = absOffset - lastRunEnd;
+
+				while (curRunOff > Character.MAX_VALUE) {
+					// SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
+					// Add zero-length runs until the offset is small enough.
+					buf.add(Character.MAX_VALUE);
+					buf.add((char) 0);
+					lastRunEnd += Character.MAX_VALUE;
+					curRunOff -= Character.MAX_VALUE;
+				}
+				
+				curRunLen = 1;
+			} else {
+				// Middle of a run
+				curRunLen++;
+			}
+		}
+
+		// Close out the last run
+		if (curRunLen >= 1) {
+			buf.add((char) curRunOff);
+			buf.add((char) curRunLen);
+		}
+
+		// Convert wasteful ArrayList to packed array.
+		char[] ret = new char[buf.size()];
+		for (int i = 0; i < buf.size(); i++) {
+			ret[i] = buf.get(i);
+		}
+		return ret;
+	}
+
+	/**
+	 * Encodes the bitmap in blocks of offsets. Within each block, the bits are
+	 * stored as absolute offsets from the start of the block.
+	 * 
+	 * @param offsets
+	 *            uncompressed contents of the bitmap, expressed as a list of
+	 *            the offsets of different bits
+	 * @return compressed version of said bitmap
+	 */
+	public static char[] genOffsetBitmap(int[] offsets) 
+	{
+		int lastOffset = offsets[offsets.length - 1];
+
+		// Build up the blocks
+		int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
+		// To simplify the logic, we make two passes.
+		// The first pass divides the offsets by block.
+		int[] blockLengths = new int[numBlocks];
+		Arrays.fill(blockLengths, 0);
+
+		for (int ix = 0; ix < offsets.length; ix++) {
+			int val = offsets[ix];
+			int blockForVal = val / BITMAP_BLOCK_SZ;
+
+			blockLengths[blockForVal]++;
+		}
+
+		// The second pass creates the blocks.
+		int totalSize = numBlocks;
+		for (int block = 0; block < numBlocks; block++) {
+			totalSize += blockLengths[block];
+		}
+		char[] encodedBlocks = new char[totalSize];
+
+		int inputIx = 0;
+		int blockStartIx = 0;
+		for (int block = 0; block < numBlocks; block++) {
+			int blockSz = blockLengths[block];
+
+			// First entry in the block is number of bits
+			encodedBlocks[blockStartIx] = (char) blockSz;
+
+			for (int i = 0; i < blockSz; i++) {
+				encodedBlocks[blockStartIx + i + 1] = (char) 
+						(offsets[inputIx+i] % BITMAP_BLOCK_SZ);
+			}
+
+			inputIx += blockSz;
+			blockStartIx += blockSz + 1;
+		}
+
+		return encodedBlocks;
+	}
+
+	/**
+	 * 
+	 * @param colIndex
+	 * @param rawblock
+	 * @return
+	 * @throws DMLRuntimeException 
+	 */
+	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) 
+	{
+		//probe map for distinct items (for value or value groups)
+		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+		
+		//scan rows and probe/build distinct items
+		final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+				rawblock.getNumColumns():rawblock.getNumRows();
+		
+		if( rawblock.isInSparseFormat() //SPARSE 
+			&& CompressedMatrixBlock.TRANSPOSE_INPUT )	
+		{
+			SparseBlock a = rawblock.getSparseBlock();
+			if( a != null && !a.isEmpty(colIndex) ) 
+			{
+				int apos = a.pos(colIndex);
+				int alen = a.size(colIndex);
+				int[] aix = a.indexes(colIndex);
+				double[] avals = a.values(colIndex);
+				
+				IntArrayList lstPtr0 = new IntArrayList(); //for 0 values
+				int last = -1;
+				//iterate over non-zero entries but fill in zeros
+				for( int j=apos; j<apos+alen; j++ ) 
+				{
+					//fill in zero values
+					if( !skipZeros )
+						for( int k=last+1; k<aix[j]; k++ ) 
+							lstPtr0.appendValue(k);
+					//handle non-zero value
+					IntArrayList lstPtr = distinctVals.get(avals[j]);	
+					if( lstPtr == null ) {
+						lstPtr = new IntArrayList();
+						distinctVals.appendValue(avals[j], lstPtr);
+					}
+					lstPtr.appendValue(aix[j]);
+					last = aix[j];
+				}
+				//fill in remaining zero values
+				if( !skipZeros ) {
+					for( int k=last+1; k<m; k++ )
+						lstPtr0.appendValue(k);
+					if( lstPtr0.size()>0 )
+						distinctVals.appendValue(0, lstPtr0);
+				}
+			}
+			else if( !skipZeros ) { //full 0 column 
+				IntArrayList lstPtr = new IntArrayList();
+				for( int i=0; i<m; i++ )
+					lstPtr.appendValue(i);
+				distinctVals.appendValue(0, lstPtr);
+			}
+		}
+		else //GENERAL CASE
+		{
+			for( int i=0; i<m; i++ ) {
+				double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+						rawblock.quickGetValue(colIndex, i):
+						rawblock.quickGetValue(i, colIndex);
+				if( val!=0 || !skipZeros ) {		
+					IntArrayList lstPtr = distinctVals.get(val);	
+					if( lstPtr == null ) {
+						lstPtr = new IntArrayList();
+						distinctVals.appendValue(val, lstPtr);
+					}
+					lstPtr.appendValue(i);
+				}
+			}
+		}
+		
+		return new UncompressedBitmap(distinctVals);
+	}
+	
+	/**
+	 * 
+	 * @param colIndex
+	 * @param rawblock
+	 * @param sampleIndexes
+	 * @return
+	 */
+	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) 
+	{
+		//note: general case only because anyway binary search for small samples
+		
+		//probe map for distinct items (for value or value groups)
+		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+		
+		//scan rows and probe/build distinct items
+		final int m = sampleIndexes.length;
+		for( int i=0; i<m; i++ ) {
+			int rowIndex = sampleIndexes[i]; 
+			double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+					rawblock.quickGetValue(colIndex, rowIndex) : 
+					rawblock.quickGetValue(rowIndex, colIndex); 
+			if( val!=0 || !skipZeros ) {					
+				IntArrayList lstPtr = distinctVals.get(val);	
+				if( lstPtr == null ) {
+					lstPtr = new IntArrayList();
+					distinctVals.appendValue(val, lstPtr);
+				}
+				lstPtr.appendValue(i);
+			}
+		}
+
+		return new UncompressedBitmap(distinctVals);
+	}
+	
+	/**
+	 * 
+	 * @param colIndices
+	 * @param rawblock
+	 * @param rowReader
+	 * @return
+	 */
+	private static UncompressedBitmap extractBitmap(int[] colIndices,
+			MatrixBlock rawblock, ReaderColumnSelection rowReader) 
+	{
+		//probe map for distinct items (for value or value groups)
+		DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
+		
+		//scan rows and probe/build distinct items
+		DblArray cellVals = null;
+		while ((cellVals = rowReader.nextRow()) != null) {
+			IntArrayList lstPtr = distinctVals.get(cellVals);
+			if (lstPtr == null) {
+				//create new objects only on demand
+				lstPtr = new IntArrayList();
+				distinctVals.appendValue(new DblArray(cellVals), lstPtr);
+			}
+			lstPtr.appendValue(rowReader.getCurrentRowIndex());
+		}
+		
+		return new UncompressedBitmap(distinctVals, colIndices.length);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index 647a701..b1072d1 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -1,259 +1,259 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
-import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
-
-/**
- * Class that stores information about a column group within a compressed matrix
- * block. There are subclasses specific to each compression type.
- * 
- */
-public abstract class ColGroup implements Serializable 
-{
-	private static final long serialVersionUID = 2439785418908671481L;
-
-	public enum CompressionType  {
-		UNCOMPRESSED,   //uncompressed sparse/dense 
-		RLE_BITMAP,     //RLE bitmap
-		OLE_BITMAP;  //OLE bitmap
-	}
-	
-	/**
-	 * Offsets of the columns that make up the column group. Zero-based, and
-	 * relative to the matrix block.
-	 */
-	protected int[] _colIndexes;
-
-	/** Number of rows in the matrix, for use by child classes. */
-	protected int _numRows;
-
-	/** How the elements of the column group are compressed. */
-	private CompressionType _compType;
-
-	
-	/**
-	 * Main constructor.
-	 * 
-	 * @param colIndices
-	 *            offsets of the columns in the matrix block that make up the
-	 *            group
-	 * @param numRows
-	 *            total number of rows in the parent block
-	 */
-	protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
-		_compType = type;
-		_colIndexes = colIndices;
-		_numRows = numRows;
-	}
-
-	/** Convenience constructor for converting indices to a more compact format. */
-	protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
-		_compType = type;
-		_colIndexes = new int[colIndicesList.size()];
-		int i = 0;
-		for (Integer index : colIndicesList)
-			_colIndexes[i++] = index;
-	}
-
-	/**
-	 * @return offsets of the columns in the matrix block that make up the group
-	 */
-	public int[] getColIndices() {
-		return _colIndexes;
-	}
-
-	/**
-	 * @param col
-	 *            an index from 0 to the number of columns in this group - 1
-	 * @return offset of the specified column in the matrix block that make up
-	 *         the group
-	 */
-	public int getColIndex(int colNum) {
-		return _colIndexes[colNum];
-	}
-
-	public int getNumRows() {
-		return _numRows;
-	}
-	
-	/**
-	 * @return number of columns in this column group
-	 */
-	public int getNumCols() {
-		return _colIndexes.length;
-	}
-
-	/**
-	 * @return How the elements of the column group are compressed.
-	 */
-	public CompressionType getCompType() {
-		return _compType;
-	}
-
-	/**
-	 * 
-	 * @param offset
-	 */
-	public void shiftColIndices(int offset)  {
-		for( int i=0; i<_colIndexes.length; i++ )
-			_colIndexes[i] += offset;
-	}
-	
-	/**
-	 * Note: Must be overridden by child classes to account for additional data
-	 * and metadata
-	 * 
-	 * @return an upper bound on the number of bytes used to store this ColGroup
-	 *         in memory.
-	 */
-	public long estimateInMemorySize() {
-		// int numRows (4B) , array reference colIndices (8B) + array object
-		// overhead if exists (32B) + 4B per element, CompressionType compType
-		// (2 booleans 2B + enum overhead 32B + reference to enum 8B)
-		long size = 54;
-		if (_colIndexes == null)
-			return size;
-		else
-			return size + 32 + 4 * _colIndexes.length;
-	}
-
-	/**
-	 * Decompress the contents of this column group into the specified full
-	 * matrix block.
-	 * 
-	 * @param target
-	 *            a matrix block where the columns covered by this column group
-	 *            have not yet been filled in.
-	 */
-	public abstract void decompressToBlock(MatrixBlock target);
-
-	/**
-	 * Decompress the contents of this column group into uncompressed packed
-	 * columns
-	 * 
-	 * @param target
-	 *            a dense matrix block. The block must have enough space to hold
-	 *            the contents of this column group.
-	 * @param colIndexTargets
-	 *            array that maps column indices in the original matrix block to
-	 *            columns of target.
-	 */
-	public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
-
-	/**
-	 * 
-	 * @param target  dense output vector
-	 * @param colpos  column to decompress, error if larger or equal numCols
-	 */
-	public abstract void decompressToBlock(MatrixBlock target, int colpos);
-
-
-	/**
-	 * Serializes column group to data output.
-	 * 
-	 * @param out
-	 * @throws IOException
-	 */
-	public abstract void write(DataOutput out) 
-		throws IOException;
-	
-	/**
-	 * Deserializes column group from data input.
-	 * 
-	 * @param in
-	 * @throws IOException
-	 */
-	public abstract void readFields(DataInput in) 
-		throws IOException;
-		
-	
-	/**
-	 * Returns the exact serialized size of column group.
-	 * This can be used for example for buffer preallocation.
-	 * 
-	 * @return
-	 */
-	public abstract long getExactSizeOnDisk();
-	
-	/**
-	 * Multiply the slice of the matrix that this column group represents by a
-	 * vector on the right.
-	 * 
-	 * @param vector
-	 *            vector to multiply by (tall vector)
-	 * @param result
-	 *            accumulator for holding the result
-	 * @throws DMLRuntimeException
-	 *             if the internal SystemML code that performs the
-	 *             multiplication experiences an error
-	 */
-	public abstract void rightMultByVector(MatrixBlock vector,
-			MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
-
-
-	/**
-	 * Multiply the slice of the matrix that this column group represents by a
-	 * row vector on the left (the original column vector is assumed to be
-	 * transposed already i.e. its size now is 1xn).
-	 * 
-	 * @param vector
-	 * @param result
-	 * @throws DMLRuntimeException 
-	 */
-	public abstract void leftMultByRowVector(MatrixBlock vector,
-			MatrixBlock result) throws DMLRuntimeException;
-
-	/**
-	 * Perform the specified scalar operation directly on the compressed column
-	 * group, without decompressing individual cells if possible.
-	 * 
-	 * @param op
-	 *            operation to perform
-	 * @return version of this column group with the operation applied
-	 */
-	public abstract ColGroup scalarOperation(ScalarOperator op)
-			throws DMLRuntimeException;
-
-	/**
-	 * 
-	 * @param op
-	 * @param result
-	 * @throws DMLUnsupportedOperationException
-	 * @throws DMLRuntimeException
-	 */
-	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
-		throws DMLRuntimeException;
-	
-	/**
-	 * 
-	 * @param rnnz
-	 */
-	protected abstract void countNonZerosPerRow(int[] rnnz);
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class that stores information about a column group within a compressed matrix
+ * block. There are subclasses specific to each compression type.
+ * 
+ */
+public abstract class ColGroup implements Serializable 
+{
+	private static final long serialVersionUID = 2439785418908671481L;
+
+	public enum CompressionType  {
+		UNCOMPRESSED,   //uncompressed sparse/dense 
+		RLE_BITMAP,     //RLE bitmap
+		OLE_BITMAP;  //OLE bitmap
+	}
+	
+	/**
+	 * Offsets of the columns that make up the column group. Zero-based, and
+	 * relative to the matrix block.
+	 */
+	protected int[] _colIndexes;
+
+	/** Number of rows in the matrix, for use by child classes. */
+	protected int _numRows;
+
+	/** How the elements of the column group are compressed. */
+	private CompressionType _compType;
+
+	
+	/**
+	 * Main constructor.
+	 * 
+	 * @param colIndices
+	 *            offsets of the columns in the matrix block that make up the
+	 *            group
+	 * @param numRows
+	 *            total number of rows in the parent block
+	 */
+	protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
+		_compType = type;
+		_colIndexes = colIndices;
+		_numRows = numRows;
+	}
+
+	/** Convenience constructor for converting indices to a more compact format. */
+	protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
+		_compType = type;
+		_colIndexes = new int[colIndicesList.size()];
+		int i = 0;
+		for (Integer index : colIndicesList)
+			_colIndexes[i++] = index;
+	}
+
+	/**
+	 * @return offsets of the columns in the matrix block that make up the group
+	 */
+	public int[] getColIndices() {
+		return _colIndexes;
+	}
+
+	/**
+	 * @param col
+	 *            an index from 0 to the number of columns in this group - 1
+	 * @return offset of the specified column in the matrix block that make up
+	 *         the group
+	 */
+	public int getColIndex(int colNum) {
+		return _colIndexes[colNum];
+	}
+
+	public int getNumRows() {
+		return _numRows;
+	}
+	
+	/**
+	 * @return number of columns in this column group
+	 */
+	public int getNumCols() {
+		return _colIndexes.length;
+	}
+
+	/**
+	 * @return How the elements of the column group are compressed.
+	 */
+	public CompressionType getCompType() {
+		return _compType;
+	}
+
+	/**
+	 * 
+	 * @param offset
+	 */
+	public void shiftColIndices(int offset)  {
+		for( int i=0; i<_colIndexes.length; i++ )
+			_colIndexes[i] += offset;
+	}
+	
+	/**
+	 * Note: Must be overridden by child classes to account for additional data
+	 * and metadata
+	 * 
+	 * @return an upper bound on the number of bytes used to store this ColGroup
+	 *         in memory.
+	 */
+	public long estimateInMemorySize() {
+		// int numRows (4B) , array reference colIndices (8B) + array object
+		// overhead if exists (32B) + 4B per element, CompressionType compType
+		// (2 booleans 2B + enum overhead 32B + reference to enum 8B)
+		long size = 54;
+		if (_colIndexes == null)
+			return size;
+		else
+			return size + 32 + 4 * _colIndexes.length;
+	}
+
+	/**
+	 * Decompress the contents of this column group into the specified full
+	 * matrix block.
+	 * 
+	 * @param target
+	 *            a matrix block where the columns covered by this column group
+	 *            have not yet been filled in.
+	 */
+	public abstract void decompressToBlock(MatrixBlock target);
+
+	/**
+	 * Decompress the contents of this column group into uncompressed packed
+	 * columns
+	 * 
+	 * @param target
+	 *            a dense matrix block. The block must have enough space to hold
+	 *            the contents of this column group.
+	 * @param colIndexTargets
+	 *            array that maps column indices in the original matrix block to
+	 *            columns of target.
+	 */
+	public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
+
+	/**
+	 * 
+	 * @param target  dense output vector
+	 * @param colpos  column to decompress, error if larger or equal numCols
+	 */
+	public abstract void decompressToBlock(MatrixBlock target, int colpos);
+
+
+	/**
+	 * Serializes column group to data output.
+	 * 
+	 * @param out
+	 * @throws IOException
+	 */
+	public abstract void write(DataOutput out) 
+		throws IOException;
+	
+	/**
+	 * Deserializes column group from data input.
+	 * 
+	 * @param in
+	 * @throws IOException
+	 */
+	public abstract void readFields(DataInput in) 
+		throws IOException;
+		
+	
+	/**
+	 * Returns the exact serialized size of column group.
+	 * This can be used for example for buffer preallocation.
+	 * 
+	 * @return
+	 */
+	public abstract long getExactSizeOnDisk();
+	
+	/**
+	 * Multiply the slice of the matrix that this column group represents by a
+	 * vector on the right.
+	 * 
+	 * @param vector
+	 *            vector to multiply by (tall vector)
+	 * @param result
+	 *            accumulator for holding the result
+	 * @throws DMLRuntimeException
+	 *             if the internal SystemML code that performs the
+	 *             multiplication experiences an error
+	 */
+	public abstract void rightMultByVector(MatrixBlock vector,
+			MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
+
+
+	/**
+	 * Multiply the slice of the matrix that this column group represents by a
+	 * row vector on the left (the original column vector is assumed to be
+	 * transposed already i.e. its size now is 1xn).
+	 * 
+	 * @param vector
+	 * @param result
+	 * @throws DMLRuntimeException 
+	 */
+	public abstract void leftMultByRowVector(MatrixBlock vector,
+			MatrixBlock result) throws DMLRuntimeException;
+
+	/**
+	 * Perform the specified scalar operation directly on the compressed column
+	 * group, without decompressing individual cells if possible.
+	 * 
+	 * @param op
+	 *            operation to perform
+	 * @return version of this column group with the operation applied
+	 */
+	public abstract ColGroup scalarOperation(ScalarOperator op)
+			throws DMLRuntimeException;
+
+	/**
+	 * 
+	 * @param op
+	 * @param result
+	 * @throws DMLUnsupportedOperationException
+	 * @throws DMLRuntimeException
+	 */
+	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
+		throws DMLRuntimeException;
+	
+	/**
+	 * 
+	 * @param rnnz
+	 */
+	protected abstract void countNonZerosPerRow(int[] rnnz);
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
index 5d60a49..3455cc7 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
@@ -1,498 +1,498 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.TreeMap;
-import java.util.Map.Entry;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
-
-
-/**
- * Base class for column groups encoded with various types of bitmap encoding.
- * 
- * 
- * NOTES:
- *  * OLE: separate storage segment length and bitmaps led to a 30% improvement
- *    but not applied because more difficult to support both data layouts at the
- *    same time (distributed/local as well as w/ and w/o low-level opt)
- */
-public abstract class ColGroupBitmap extends ColGroup 
-{
-	private static final long serialVersionUID = -1635828933479403125L;
-	
-	public static final boolean LOW_LEVEL_OPT = true;	
-	//sorting of values by physical length helps by 10-20%, especially for serial, while
-	//slight performance decrease for parallel incl multi-threaded, hence not applied for
-	//distributed operations (also because compression time + garbage collection increases)
-	private static final boolean SORT_VALUES_BY_LENGTH = true; 
-	protected static final boolean CREATE_SKIPLIST = true;
-	
-	protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	
-	/** Distinct values associated with individual bitmaps. */
-	protected double[] _values; //linearized <numcol vals> <numcol vals>
-
-	/** Bitmaps, one per uncompressed value in {@link #values}. */
-	protected int[] _ptr; //bitmap offsets per value
-	protected char[] _data; //linearized bitmaps (variable length)
-	
-	protected int[] _skiplist;
-	
-	public ColGroupBitmap(CompressionType type) {
-		super(type, (int[]) null, -1);
-	}
-	
-	/**
-	 * Main constructor. Stores the headers for the individual bitmaps.
-	 * 
-	 * @param colIndices
-	 *            indices (within the block) of the columns included in this
-	 *            column
-	 * @param numRows
-	 *            total number of rows in the parent block
-	 * @param ubm
-	 *            Uncompressed bitmap representation of the block
-	 */
-	public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) 
-	{
-		super(type, colIndices, numRows);
-
-		// Extract and store just the distinct values. The bitmaps themselves go
-		// into the subclasses.
-		final int numCols = ubm.getNumColumns();
-		final int numVals = ubm.getNumValues();
-		
-		_values = new double[numVals*numCols];
-		for (int i=0; i<numVals; i++) {
-			//note: deep copied internally on getValues
-			double[] tmp = ubm.getValues(i);
-			System.arraycopy(tmp, 0, _values, i*numCols, numCols);
-		}
-	}
-
-	/**
-	 * Constructor for subclass methods that need to create shallow copies
-	 * 
-	 * @param colIndices
-	 *            raw column index information
-	 * @param numRows
-	 *            number of rows in the block
-	 * @param values
-	 *            set of distinct values for the block (associated bitmaps are
-	 *            kept in the subclass)
-	 */
-	protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) {
-		super(type, colIndices, numRows);
-		_values = values;
-	}
-	
-	protected final int len(int k) {
-		return _ptr[k+1] - _ptr[k];
-	}
-
-	/**
-	 * 
-	 * @param numVals
-	 * @param totalLen
-	 * @param lbitmaps
-	 */
-	protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
-	{
-		// compact bitmaps to linearized representation
-		if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
-			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
-		{
-			// sort value by num segments in descending order
-			TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
-			for( int i=0; i<numVals; i++ ) {
-				int revlen = totalLen-lbitmaps[i].length;
-				if( !tree.containsKey(revlen) )
-					tree.put(revlen, new ArrayList<Integer>());
-				tree.get(revlen).add(i);
-			}
-			
-			// compact bitmaps to linearized representation
-			_ptr = new int[numVals+1];
-			_data = new char[totalLen];
-			int pos = 0, off = 0;
-			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
-				for( Integer tmpix : e.getValue() ) {
-					int len = lbitmaps[tmpix].length;
-					_ptr[pos] = off;
-					System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
-					off += len;
-					pos++;
-				}
-			}
-			_ptr[numVals] = totalLen;
-			
-			// reorder values
-			double[] lvalues = new double[_values.length];
-			int off2 = 0; int numCols = _colIndexes.length;
-			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
-				for( Integer tmpix : e.getValue() ) {
-					System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);				
-					off2 += numCols;
-				}
-			}			
-			_values = lvalues;
-		}
-		else
-		{
-			// compact bitmaps to linearized representation
-			_ptr = new int[numVals+1];
-			_data = new char[totalLen];
-			for( int i=0, off=0; i<numVals; i++ ) {
-				int len = lbitmaps[i].length;
-				_ptr[i] = off;
-				System.arraycopy(lbitmaps[i], 0, _data, off, len);
-				off += len;
-			}
-			_ptr[numVals] = totalLen;
-		}
-	}
-	
-	@Override
-	public long estimateInMemorySize() {
-		long size = super.estimateInMemorySize();
-		
-		// adding the size of values
-		size += 8; //array reference
-		if (_values != null) {
-			size += 32 + _values.length * 8; //values
-		}
-		
-		// adding bitmaps size
-		size += 16; //array references
-		if (_data != null) {
-			size += 32 + _ptr.length * 4; // offsets
-			size += 32 + _data.length * 2;    // bitmaps
-		}
-	
-		return size;
-	}
-
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		int[] colIndices = getColIndices();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				for (int colIx = 0; colIx < numCols; colIx++) {
-					target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
-				}
-			}
-		}
-	}
-
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				for (int colIx = 0; colIx < numCols; colIx++) {
-					int origMatrixColIx = getColIndex(colIx);
-					int targetColIx = colIndexTargets[origMatrixColIx];
-					target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
-				}
-			}
-		}
-	}
-	
-	//generic decompression for OLE/RLE, to be overwritten for performance
-	@Override
-	public void decompressToBlock(MatrixBlock target, int colpos) 
-	{
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		
-		// Run through the bitmaps for this column group
-		for (int i = 0; i < numVals; i++) {
-			Iterator<Integer> decoder = getDecodeIterator(i);
-			int valOff = i*numCols;
-
-			while (decoder.hasNext()) {
-				int row = decoder.next();
-				target.quickSetValue(row, 0, _values[valOff+colpos]);
-			}
-		}
-	}
-	
-	/**
-	 * 
-	 * @param bitmapIx
-	 * @return
-	 */
-	protected double sumValues(int bitmapIx)
-	{
-		final int numCols = getNumCols();
-		final int valOff = bitmapIx * numCols;
-		
-		double val = 0.0;
-		for( int i = 0; i < numCols; i++ ) {
-			val += _values[valOff+i];
-		}
-		
-		return val;
-	}
-	
-	protected double sumValues(int bitmapIx, double[] b)
-	{
-		final int numCols = getNumCols();
-		final int valOff = bitmapIx * numCols;
-		
-		double val = 0;
-		for( int i = 0; i < numCols; i++ ) {
-			val += _values[valOff+i] * b[i];
-		}
-		
-		return val;
-	}
-	
-	/**
-	 * 
-	 * @param b
-	 * @param c
-	 */
-	protected void sumAllValues(double[] b, double[] c)
-	{
-		final int numVals = getNumValues();
-		final int numCols = getNumCols();
-		
-		//vectMultiplyAdd over cols instead of dotProduct over vals because
-		//usually more values than columns
-		for( int i=0, off=0; i<numCols; i++, off+=numVals )
-			LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
-	}
-
-	/**
-	 * Method for use by subclasses. Applies a scalar operation to the value
-	 * metadata stored in the superclass.
-	 * 
-	 * @param op
-	 *            scalar operation to perform
-	 * @return transformed copy of value metadata for this column group
-	 * @throws DMLRuntimeException
-	 */
-	protected double[] applyScalarOp(ScalarOperator op)
-			throws DMLRuntimeException 
-	{
-		//scan over linearized values
-		double[] ret = new double[_values.length];
-		for (int i = 0; i < _values.length; i++) {
-			ret[i] = op.executeScalar(_values[i]);
-		}
-
-		return ret;
-	}
-	
-	/**
-	 * 
-	 * @param op
-	 * @param newVal
-	 * @param numCols
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
-			throws DMLRuntimeException 
-	{
-		//scan over linearized values
-		double[] ret = new double[_values.length + numCols];
-		for( int i = 0; i < _values.length; i++ ) {
-			ret[i] = op.executeScalar(_values[i]);
-		}
-		
-		//add new value to the end
-		Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
-		
-		return ret;
-	}
-
-	/**
-	 * @return the number of distinct sets of values associated with the bitmaps
-	 *         in this column group
-	 */
-	public int getNumValues() {
-		return _values.length / _colIndexes.length;
-	}
-
-	/**
-	 * 
-	 * @return
-	 */
-	public double[] getValues() {
-		return _values;
-	}
-	
-	/**
-	 * 
-	 * @return
-	 */
-	public char[] getBitmaps() {
-		return _data;
-	}
-	
-	public int[] getBitmapOffsets() {
-		return _ptr;
-	}
-
-	/**
-	 * @param bmpIx
-	 *            index of a specific compressed bitmap (stored in subclass,
-	 *            index same as {@link #values})
-	 * @return an object for iterating over the row offsets in this bitmap. Only
-	 *         valid until the next call to this method. May be reused across
-	 *         calls.
-	 */
-	public abstract Iterator<Integer> getDecodeIterator(int bmpIx);
-
-	/**
-	 * Utility function of sparse-unsafe operations.
-	 * 
-	 * @param ind
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	protected int[] computeOffsets(boolean[] ind)
-		throws DMLRuntimeException 
-	{
-		//determine number of offsets
-		int numOffsets = 0;
-		for( int i=0; i<ind.length; i++ )
-			numOffsets += ind[i] ? 1 : 0;
-		
-		//create offset lists
-		int[] ret = new int[numOffsets];
-		for( int i=0, pos=0; i<ind.length; i++ )
-			if( ind[i] )
-				ret[pos++] = i;
-		
-		return ret;
-	}
-
-	@Override
-	public void readFields(DataInput in) 
-		throws IOException 
-	{
-		_numRows = in.readInt();
-		int numCols = in.readInt();
-		int numVals = in.readInt();
-		
-		//read col indices
-		_colIndexes = new int[ numCols ];
-		for( int i=0; i<numCols; i++ )
-			_colIndexes[i] = in.readInt();
-		
-		//read distinct values
-		_values = new double[numVals*numCols];
-		for( int i=0; i<numVals*numCols; i++ )
-			_values[i] = in.readDouble();
-		
-		//read bitmaps
-		int totalLen = in.readInt();
-		_ptr = new int[numVals+1];
-		_data = new char[totalLen];		
-		for( int i=0, off=0; i<numVals; i++ ) {
-			int len = in.readInt();
-			_ptr[i] = off;
-			for( int j=0; j<len; j++ )
-				_data[off+j] = in.readChar();
-			off += len;
-		}
-		_ptr[numVals] = totalLen;
-	}
-	
-	@Override
-	public void write(DataOutput out) 
-		throws IOException 
-	{
-		int numCols = getNumCols();
-		int numVals = getNumValues();
-		out.writeInt(_numRows);
-		out.writeInt(numCols);
-		out.writeInt(numVals);
-		
-		//write col indices
-		for( int i=0; i<_colIndexes.length; i++ )
-			out.writeInt( _colIndexes[i] );
-		
-		//write distinct values
-		for( int i=0; i<_values.length; i++ )
-			out.writeDouble(_values[i]);
-
-		//write bitmaps (lens and data, offset later recreated)
-		int totalLen = 0;
-		for( int i=0; i<numVals; i++ )
-			totalLen += len(i);
-		out.writeInt(totalLen);	
-		for( int i=0; i<numVals; i++ ) {
-			int len = len(i);
-			int off = _ptr[i];
-			out.writeInt(len);
-			for( int j=0; j<len; j++ )
-				out.writeChar(_data[off+j]);
-		}
-	}
-
-	@Override
-	public long getExactSizeOnDisk() {
-		long ret = 12; //header
-		//col indices
-		ret += 4 * _colIndexes.length; 
-		//distinct values (groups of values)
-		ret += 8 * _values.length;
-		//actual bitmaps
-		ret += 4; //total length
-		for( int i=0; i<getNumValues(); i++ )
-			ret += 4 + 2 * len(i);
-		
-		return ret;
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+
+/**
+ * Base class for column groups encoded with various types of bitmap encoding.
+ * 
+ * 
+ * NOTES:
+ *  * OLE: separate storage segment length and bitmaps led to a 30% improvement
+ *    but not applied because more difficult to support both data layouts at the
+ *    same time (distributed/local as well as w/ and w/o low-level opt)
+ */
+public abstract class ColGroupBitmap extends ColGroup 
+{
+	private static final long serialVersionUID = -1635828933479403125L;
+	
+	public static final boolean LOW_LEVEL_OPT = true;	
+	//sorting of values by physical length helps by 10-20%, especially for serial, while
+	//slight performance decrease for parallel incl multi-threaded, hence not applied for
+	//distributed operations (also because compression time + garbage collection increases)
+	private static final boolean SORT_VALUES_BY_LENGTH = true; 
+	protected static final boolean CREATE_SKIPLIST = true;
+	
+	protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+	protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+	
+	/** Distinct values associated with individual bitmaps. */
+	protected double[] _values; //linearized <numcol vals> <numcol vals>
+
+	/** Bitmaps, one per uncompressed value in {@link #values}. */
+	protected int[] _ptr; //bitmap offsets per value
+	protected char[] _data; //linearized bitmaps (variable length)
+	
+	protected int[] _skiplist;
+	
+	public ColGroupBitmap(CompressionType type) {
+		super(type, (int[]) null, -1);
+	}
+	
+	/**
+	 * Main constructor. Stores the headers for the individual bitmaps.
+	 * 
+	 * @param colIndices
+	 *            indices (within the block) of the columns included in this
+	 *            column
+	 * @param numRows
+	 *            total number of rows in the parent block
+	 * @param ubm
+	 *            Uncompressed bitmap representation of the block
+	 */
+	public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) 
+	{
+		super(type, colIndices, numRows);
+
+		// Extract and store just the distinct values. The bitmaps themselves go
+		// into the subclasses.
+		final int numCols = ubm.getNumColumns();
+		final int numVals = ubm.getNumValues();
+		
+		_values = new double[numVals*numCols];
+		for (int i=0; i<numVals; i++) {
+			//note: deep copied internally on getValues
+			double[] tmp = ubm.getValues(i);
+			System.arraycopy(tmp, 0, _values, i*numCols, numCols);
+		}
+	}
+
+	/**
+	 * Constructor for subclass methods that need to create shallow copies
+	 * 
+	 * @param colIndices
+	 *            raw column index information
+	 * @param numRows
+	 *            number of rows in the block
+	 * @param values
+	 *            set of distinct values for the block (associated bitmaps are
+	 *            kept in the subclass)
+	 */
+	protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) {
+		super(type, colIndices, numRows);
+		_values = values;
+	}
+	
+	protected final int len(int k) {
+		return _ptr[k+1] - _ptr[k];
+	}
+
+	/**
+	 * 
+	 * @param numVals
+	 * @param totalLen
+	 * @param lbitmaps
+	 */
+	protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
+	{
+		// compact bitmaps to linearized representation
+		if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
+			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
+		{
+			// sort value by num segments in descending order
+			TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
+			for( int i=0; i<numVals; i++ ) {
+				int revlen = totalLen-lbitmaps[i].length;
+				if( !tree.containsKey(revlen) )
+					tree.put(revlen, new ArrayList<Integer>());
+				tree.get(revlen).add(i);
+			}
+			
+			// compact bitmaps to linearized representation
+			_ptr = new int[numVals+1];
+			_data = new char[totalLen];
+			int pos = 0, off = 0;
+			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+				for( Integer tmpix : e.getValue() ) {
+					int len = lbitmaps[tmpix].length;
+					_ptr[pos] = off;
+					System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
+					off += len;
+					pos++;
+				}
+			}
+			_ptr[numVals] = totalLen;
+			
+			// reorder values
+			double[] lvalues = new double[_values.length];
+			int off2 = 0; int numCols = _colIndexes.length;
+			for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+				for( Integer tmpix : e.getValue() ) {
+					System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);				
+					off2 += numCols;
+				}
+			}			
+			_values = lvalues;
+		}
+		else
+		{
+			// compact bitmaps to linearized representation
+			_ptr = new int[numVals+1];
+			_data = new char[totalLen];
+			for( int i=0, off=0; i<numVals; i++ ) {
+				int len = lbitmaps[i].length;
+				_ptr[i] = off;
+				System.arraycopy(lbitmaps[i], 0, _data, off, len);
+				off += len;
+			}
+			_ptr[numVals] = totalLen;
+		}
+	}
+	
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		
+		// adding the size of values
+		size += 8; //array reference
+		if (_values != null) {
+			size += 32 + _values.length * 8; //values
+		}
+		
+		// adding bitmaps size
+		size += 16; //array references
+		if (_data != null) {
+			size += 32 + _ptr.length * 4; // offsets
+			size += 32 + _data.length * 2;    // bitmaps
+		}
+	
+		return size;
+	}
+
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		int[] colIndices = getColIndices();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				for (int colIx = 0; colIx < numCols; colIx++) {
+					target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
+				}
+			}
+		}
+	}
+
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				for (int colIx = 0; colIx < numCols; colIx++) {
+					int origMatrixColIx = getColIndex(colIx);
+					int targetColIx = colIndexTargets[origMatrixColIx];
+					target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
+				}
+			}
+		}
+	}
+	
+	//generic decompression for OLE/RLE, to be overwritten for performance
+	@Override
+	public void decompressToBlock(MatrixBlock target, int colpos) 
+	{
+		final int numCols = getNumCols();
+		final int numVals = getNumValues();
+		
+		// Run through the bitmaps for this column group
+		for (int i = 0; i < numVals; i++) {
+			Iterator<Integer> decoder = getDecodeIterator(i);
+			int valOff = i*numCols;
+
+			while (decoder.hasNext()) {
+				int row = decoder.next();
+				target.quickSetValue(row, 0, _values[valOff+colpos]);
+			}
+		}
+	}
+	
+	/**
+	 * 
+	 * @param bitmapIx
+	 * @return
+	 */
+	protected double sumValues(int bitmapIx)
+	{
+		final int numCols = getNumCols();
+		final int valOff = bitmapIx * numCols;
+		
+		double val = 0.0;
+		for( int i = 0; i < numCols; i++ ) {
+			val += _values[valOff+i];
+		}
+		
+		return val;
+	}
+	
+	protected double sumValues(int bitmapIx, double[] b)
+	{
+		final int numCols = getNumCols();
+		final int valOff = bitmapIx * numCols;
+		
+		double val = 0;
+		for( int i = 0; i < numCols; i++ ) {
+			val += _values[valOff+i] * b[i];
+		}
+		
+		return val;
+	}
+	
+	/**
+	 * 
+	 * @param b
+	 * @param c
+	 */
+	protected void sumAllValues(double[] b, double[] c)
+	{
+		final int numVals = getNumValues();
+		final int numCols = getNumCols();
+		
+		//vectMultiplyAdd over cols instead of dotProduct over vals because
+		//usually more values than columns
+		for( int i=0, off=0; i<numCols; i++, off+=numVals )
+			LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
+	}
+
+	/**
+	 * Method for use by subclasses. Applies a scalar operation to the value
+	 * metadata stored in the superclass.
+	 * 
+	 * @param op
+	 *            scalar operation to perform
+	 * @return transformed copy of value metadata for this column group
+	 * @throws DMLRuntimeException
+	 */
+	protected double[] applyScalarOp(ScalarOperator op)
+			throws DMLRuntimeException 
+	{
+		//scan over linearized values
+		double[] ret = new double[_values.length];
+		for (int i = 0; i < _values.length; i++) {
+			ret[i] = op.executeScalar(_values[i]);
+		}
+
+		return ret;
+	}
+	
+	/**
+	 * 
+	 * @param op
+	 * @param newVal
+	 * @param numCols
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
+			throws DMLRuntimeException 
+	{
+		//scan over linearized values
+		double[] ret = new double[_values.length + numCols];
+		for( int i = 0; i < _values.length; i++ ) {
+			ret[i] = op.executeScalar(_values[i]);
+		}
+		
+		//add new value to the end
+		Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
+		
+		return ret;
+	}
+
+	/**
+	 * @return the number of distinct sets of values associated with the bitmaps
+	 *         in this column group
+	 */
+	public int getNumValues() {
+		return _values.length / _colIndexes.length;
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public double[] getValues() {
+		return _values;
+	}
+	
+	/**
+	 * 
+	 * @return
+	 */
+	public char[] getBitmaps() {
+		return _data;
+	}
+	
+	public int[] getBitmapOffsets() {
+		return _ptr;
+	}
+
+	/**
+	 * @param bmpIx
+	 *            index of a specific compressed bitmap (stored in subclass,
+	 *            index same as {@link #values})
+	 * @return an object for iterating over the row offsets in this bitmap. Only
+	 *         valid until the next call to this method. May be reused across
+	 *         calls.
+	 */
+	public abstract Iterator<Integer> getDecodeIterator(int bmpIx);
+
+	/**
+	 * Utility function of sparse-unsafe operations.
+	 * 
+	 * @param ind
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	protected int[] computeOffsets(boolean[] ind)
+		throws DMLRuntimeException 
+	{
+		//determine number of offsets
+		int numOffsets = 0;
+		for( int i=0; i<ind.length; i++ )
+			numOffsets += ind[i] ? 1 : 0;
+		
+		//create offset lists
+		int[] ret = new int[numOffsets];
+		for( int i=0, pos=0; i<ind.length; i++ )
+			if( ind[i] )
+				ret[pos++] = i;
+		
+		return ret;
+	}
+
+	@Override
+	public void readFields(DataInput in) 
+		throws IOException 
+	{
+		_numRows = in.readInt();
+		int numCols = in.readInt();
+		int numVals = in.readInt();
+		
+		//read col indices
+		_colIndexes = new int[ numCols ];
+		for( int i=0; i<numCols; i++ )
+			_colIndexes[i] = in.readInt();
+		
+		//read distinct values
+		_values = new double[numVals*numCols];
+		for( int i=0; i<numVals*numCols; i++ )
+			_values[i] = in.readDouble();
+		
+		//read bitmaps
+		int totalLen = in.readInt();
+		_ptr = new int[numVals+1];
+		_data = new char[totalLen];		
+		for( int i=0, off=0; i<numVals; i++ ) {
+			int len = in.readInt();
+			_ptr[i] = off;
+			for( int j=0; j<len; j++ )
+				_data[off+j] = in.readChar();
+			off += len;
+		}
+		_ptr[numVals] = totalLen;
+	}
+	
+	@Override
+	public void write(DataOutput out) 
+		throws IOException 
+	{
+		int numCols = getNumCols();
+		int numVals = getNumValues();
+		out.writeInt(_numRows);
+		out.writeInt(numCols);
+		out.writeInt(numVals);
+		
+		//write col indices
+		for( int i=0; i<_colIndexes.length; i++ )
+			out.writeInt( _colIndexes[i] );
+		
+		//write distinct values
+		for( int i=0; i<_values.length; i++ )
+			out.writeDouble(_values[i]);
+
+		//write bitmaps (lens and data, offset later recreated)
+		int totalLen = 0;
+		for( int i=0; i<numVals; i++ )
+			totalLen += len(i);
+		out.writeInt(totalLen);	
+		for( int i=0; i<numVals; i++ ) {
+			int len = len(i);
+			int off = _ptr[i];
+			out.writeInt(len);
+			for( int j=0; j<len; j++ )
+				out.writeChar(_data[off+j]);
+		}
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = 12; //header
+		//col indices
+		ret += 4 * _colIndexes.length; 
+		//distinct values (groups of values)
+		ret += 8 * _values.length;
+		//actual bitmaps
+		ret += 4; //total length
+		for( int i=0; i<getNumValues(); i++ )
+			ret += 4 + 2 * len(i);
+		
+		return ret;
+	}
+}