You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/17 00:23:29 UTC

[4/6] incubator-systemml git commit: [SYSTEMML-810] New compressed matrix blocks and operations, tests

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
new file mode 100644
index 0000000..07d9757
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCoder.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.TreeMap;
+
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+
+public class PlanningCoCoder 
+{
+	//constants for weight computation
+	private final static float GROUPABILITY_THRESHOLD = 0.00064f;
+	private final static boolean USE_BIN_WEIGHT = false;
+	private final static float PARTITION_WEIGHT = 0.05F; //higher values lead to more grouping
+	private final static float PARTITION_SIZE = PARTITION_WEIGHT * GROUPABILITY_THRESHOLD;
+	private final static float BIN_WEIGHT_PARAM = -0.65f; //lower values lead to more grouping
+
+	/**
+	 * 
+	 * @param sizeEstimator
+	 * @param availCols
+	 * @param colsCardinalities
+	 * @param compressedSize
+	 * @param numRows
+	 * @param sparsity
+	 * @return
+	 */
+	public static List<int[]> findCocodesByPartitioning(CompressedSizeEstimator sizeEstimator, List<Integer> availCols, 
+			List<Integer> colsCardinalities,List<Long> compressedSize, int numRows, double sparsity) 
+	{
+		float numRowsWeight = numRows;
+		List<int[]> retGroups = new ArrayList<int[]>();
+		// filtering out non-groupable columns as singleton groups
+		int numCols = availCols.size();
+		List<Integer> groupabaleCols = new ArrayList<Integer>();
+		// weighted of each column is the ratio of its cardinality to the number
+		// of rows scaled by the matrix sparsity
+		List<Float> groupabaleColWeights = new ArrayList<Float>();
+		HashMap<Integer, GroupableColInfo> groupableColsInfo = new HashMap<Integer, GroupableColInfo>();
+		for (int i = 0; i < numCols; i++) {
+			int colIx = availCols.get(i);
+			int cardinality = colsCardinalities.get(i);
+			float weight = ((float) cardinality) / numRowsWeight;
+			if (weight <= GROUPABILITY_THRESHOLD) {
+				groupabaleCols.add(colIx);
+				groupabaleColWeights.add(weight);
+				groupableColsInfo.put(colIx, new GroupableColInfo(weight,
+						compressedSize.get(i)));
+			} else {
+				retGroups.add(new int[] { colIx });
+			}
+		}
+		// bin packing based on PARTITION_WEIGHT and column weights
+		float weight = computeWeightForCoCoding(numRows, sparsity);
+		TreeMap<Float, List<List<Integer>>> bins = new PlanningBinPacker(
+				weight, groupabaleCols, groupabaleColWeights) 
+				.packFirstFit();
+
+		// brute force grouping within each partition
+		for (List<List<Integer>> binList : bins.values()) {
+			for (List<Integer> bin : binList) {
+				// building an array of singleton CoCodingGroup
+				PlanningCoCodingGroup[] singltonGroups = new PlanningCoCodingGroup[bin.size()];
+				int i = 0;
+				GroupableColInfo colInfo;
+				for (Integer col : bin) {
+					colInfo = groupableColsInfo.get(col);
+					singltonGroups[i++] = new PlanningCoCodingGroup(col, colInfo.size,
+							colInfo.cardRatio);
+				}
+				PlanningCoCodingGroup[] outputGroups = findCocodesBruteForce(
+						sizeEstimator, numRowsWeight, singltonGroups);
+				
+				for (PlanningCoCodingGroup grp : outputGroups) {
+					retGroups.add(grp.getColIndices());
+				}
+			}
+		}
+		return retGroups;
+	}
+
+	/**
+	 * Identify columns to code together. Uses a greedy approach that merges
+	 * pairs of column groups into larger groups. Each phase of the greedy
+	 * algorithm considers all combinations of pairs to merge.
+	 * 
+	 */
+	private static PlanningCoCodingGroup[] findCocodesBruteForce(
+			CompressedSizeEstimator sizeEstimator, float numRowsWeight,
+			PlanningCoCodingGroup[] singltonGroups) 
+	{
+		// Populate a priority queue with all available 2-column cocodings.
+		PriorityQueue<PlanningGroupMergeAction> q = new PriorityQueue<PlanningGroupMergeAction>();
+		for (int leftIx = 0; leftIx < singltonGroups.length; leftIx++) {
+			PlanningCoCodingGroup leftGrp = singltonGroups[leftIx];
+			for (int rightIx = leftIx + 1; rightIx < singltonGroups.length; rightIx++) {
+				PlanningCoCodingGroup rightGrp = singltonGroups[rightIx];
+				// at least one of the two groups should be low-cardinality
+				float cardRatio = leftGrp.getCardinalityRatio() + rightGrp.getCardinalityRatio(); 
+				if ( cardRatio < GROUPABILITY_THRESHOLD) {
+					PlanningGroupMergeAction potentialMerge = new PlanningGroupMergeAction(
+							sizeEstimator, numRowsWeight, leftGrp, rightGrp);
+					if (potentialMerge.getChangeInSize() < 0) {
+						q.add(potentialMerge);
+					}
+				}
+			}
+		}
+		PlanningCoCodingGroup[] colGroups = singltonGroups;
+		
+		// Greedily merge groups until we can no longer reduce the number of
+		// runs by merging groups
+		while (q.size() > 0) {
+			PlanningGroupMergeAction merge = q.poll();
+
+			// The queue can contain merge actions involving column groups that
+			// have already been merged.
+			// Filter those actions out.
+			int leftIx = findInArray(colGroups, merge.getLeftGrp());
+			int rightIx = findInArray(colGroups, merge.getRightGrp());
+			if (leftIx < 0 || rightIx < 0) {
+				// One or more of the groups to be merged has already been made
+				// part of another group.
+				// Drop the merge action.
+			} else {
+				PlanningCoCodingGroup mergedGrp = merge.getMergedGrp();
+
+				PlanningCoCodingGroup[] newColGroups = new PlanningCoCodingGroup[colGroups.length - 1];
+				int targetIx = 0;
+				for (int i = 0; i < colGroups.length; i++) {
+					if (i != leftIx && i != rightIx) {
+						newColGroups[targetIx] = colGroups[i];
+						targetIx++;
+					}
+				}
+
+				// New group goes at the end to (hopefully) speed up future
+				// linear search operations
+				newColGroups[newColGroups.length - 1] = mergedGrp;
+
+				// Consider merging the new group with all the other
+				// pre-existing groups.
+				for (int i = 0; i < newColGroups.length - 1; i++) {
+					PlanningCoCodingGroup newLeftGrp = newColGroups[i];
+					PlanningCoCodingGroup newRightGrp = mergedGrp;
+					if (newLeftGrp.getCardinalityRatio()
+							+ newRightGrp.getCardinalityRatio() < GROUPABILITY_THRESHOLD) {
+						PlanningGroupMergeAction newPotentialMerge = new PlanningGroupMergeAction(
+								sizeEstimator, numRowsWeight, newLeftGrp,
+								newRightGrp);
+						if (newPotentialMerge.getChangeInSize() < 0) {
+							q.add(newPotentialMerge);
+						}
+					}
+				}
+				colGroups = newColGroups;
+			}
+		}
+		return colGroups;
+	}
+
+	/**
+	 * 
+	 * @param numRows
+	 * @param sparsity
+	 * @return
+	 */
+	private static float computeWeightForCoCoding(int numRows, double sparsity)
+	{
+		if( USE_BIN_WEIGHT ) { //new method (non-conclusive)
+			//return (float) Math.pow(numRows*sparsity,BIN_WEIGHT_PARAM);
+			return (float) Math.pow(numRows,BIN_WEIGHT_PARAM);
+		}
+		else {
+			return PARTITION_SIZE;
+		}
+	}
+	
+	/**
+	 * 
+	 * @param arr
+	 * @param val
+	 * @return
+	 */
+	private static int findInArray(Object[] arr, Object val) {
+		for (int i = 0; i < arr.length; i++) {
+			if (arr[i].equals(val)) {
+				return i;
+			}
+		}
+		return -1;
+	}
+	
+	/**
+	 * 
+	 */
+	private static class GroupableColInfo {
+		float cardRatio;
+		long size;
+
+		public GroupableColInfo(float lcardRatio, long lsize) {
+			cardRatio = lcardRatio;
+			size = lsize;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
new file mode 100644
index 0000000..221e4ca
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
+
+/** 
+ * Class to represent information about co-coding a group of columns. 
+ * 
+ */
+public class PlanningCoCodingGroup 
+{
+	private int[] _colIndexes;
+	private long _estSize;
+	private float _cardRatio;
+
+	/**
+	 * Constructor for a one-column group; i.e. do not co-code a given column.
+	 * 
+	 */
+	public PlanningCoCodingGroup(int col, long estSize, float cardRatio) {
+		_colIndexes = new int[]{col};
+		_estSize = estSize;
+		_cardRatio = cardRatio;
+	}
+
+	/**
+	 * Constructor for merging two disjoint groups of columns
+	 * 
+	 * @param grp1   first group of columns to merge
+	 * @param grp2   second group to merge
+	 * @param numRowsWeight numRows x sparsity
+	 */
+	public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2,
+			CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight) 
+	{
+		// merge sorted non-empty arrays
+		_colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length];		
+		int grp1Ptr = 0, grp2Ptr = 0;
+		for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) {
+			if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) {
+				_colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++];
+				if (grp1Ptr == grp1._colIndexes.length) {
+					System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes,
+							mergedIx + 1, grp2._colIndexes.length - grp2Ptr);
+					break;
+				}
+			} else {
+				_colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++];
+				if (grp2Ptr == grp2._colIndexes.length) {
+					System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes,
+							mergedIx + 1, grp1._colIndexes.length - grp1Ptr);
+					break;
+				}
+			}
+		}
+		
+		// estimating size info
+		CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator
+				.estimateCompressedColGroupSize(_colIndexes);
+		_estSize = groupSizeInfo.getMinSize();
+		_cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight;
+	}
+
+	public int[] getColIndices() {
+		return _colIndexes;
+	}
+
+	/**
+	 * @return estimated compressed size of the grouped columns
+	 */
+	public long getEstSize() {
+		return _estSize;
+	}
+
+	public float getCardinalityRatio() {
+		return _cardRatio;
+	}
+
+	@Override
+	public String toString() {
+		return Arrays.toString(_colIndexes);
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
new file mode 100644
index 0000000..5e3c6c5
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+
+/**
+ * Internal data structure for tracking potential merges of column groups in
+ * co-coding calculations.
+ * 
+ */
+class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction> 
+{
+	private PlanningCoCodingGroup _leftGrp;   //left input
+	private PlanningCoCodingGroup _rightGrp;  //right input
+	private PlanningCoCodingGroup _mergedGrp; //output
+	private long _changeInSize;
+
+	
+	public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator,
+			float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) {
+		_leftGrp = leftGrp;
+		_rightGrp = rightGrp;
+		_mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight);
+
+		// Negative size change ==> Decrease in size
+		_changeInSize = _mergedGrp.getEstSize() 
+				- leftGrp.getEstSize() - rightGrp.getEstSize();
+	}
+
+	public int compareTo(PlanningGroupMergeAction o) {
+		// We only sort by the change in size
+		return (int) Math.signum(_changeInSize - o._changeInSize);
+	}
+
+	@Override
+	public String toString() {
+		return String.format("Merge %s and %s", _leftGrp, _rightGrp);
+	}
+
+	public PlanningCoCodingGroup getLeftGrp() {
+		return _leftGrp;
+	}
+
+	public PlanningCoCodingGroup getRightGrp() {
+		return _rightGrp;
+	}
+
+	public PlanningCoCodingGroup getMergedGrp() {
+		return _mergedGrp;
+	}
+
+	public long getChangeInSize() {
+		return _changeInSize;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
new file mode 100644
index 0000000..a37018f
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+
+/**
+ * Base class for all column selection readers.
+ * 
+ */
+public abstract class ReaderColumnSelection 
+{
+	protected int[] _colIndexes = null;
+	protected int _numRows = -1;
+	protected int _lastRow = -1;
+	protected boolean _skipZeros = false;
+	
+	protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros) {
+		_colIndexes = colIndexes;
+		_numRows = numRows;
+		_lastRow = -1;
+		_skipZeros = skipZeros;
+	}
+	
+	/**
+	 * Gets the next row, null when no more rows.
+	 * 
+	 * @return
+	 */
+	public abstract DblArray nextRow();
+
+	/**
+	 * 
+	 * @return
+	 */
+	public int getCurrentRowIndex() {
+		return _lastRow;
+	}
+	
+
+	/**
+	 * Resets the reader to the first row.
+	 */
+	public void reset() {
+		_lastRow = -1;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
new file mode 100644
index 0000000..d22f39d
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+public class ReaderColumnSelectionDense extends ReaderColumnSelection 
+{
+	protected MatrixBlock _data;
+	
+	// reusable return
+	private DblArray nonZeroReturn;
+	private DblArray reusableReturn;
+	private double[] reusableArr;
+
+	public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros) {
+		super(colIndices, CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+				data.getNumColumns() : data.getNumRows(), skipZeros);
+		_data = data;
+		reusableArr = new double[colIndices.length];
+		reusableReturn = new DblArray(reusableArr);
+	}
+
+	@Override
+	public DblArray nextRow() {
+		if( _skipZeros) {
+			while ((nonZeroReturn = getNextRow()) != null
+				&& DblArray.isZero(nonZeroReturn)); 
+			return nonZeroReturn;
+		} else {
+			return getNextRow();
+		}
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	private DblArray getNextRow() {
+		if(_lastRow == _numRows-1)
+			return null;
+		_lastRow++;
+		for (int i = 0; i < _colIndexes.length; i++) {
+			reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+					_data.quickGetValue( _colIndexes[i], _lastRow ) : 
+					_data.quickGetValue( _lastRow, _colIndexes[i] );
+		}
+		return reusableReturn;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
new file mode 100644
index 0000000..06518e4
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * 
+ * considers only a subset of row indexes
+ */
+public class ReaderColumnSelectionDenseSample extends ReaderColumnSelection 
+{
+	protected MatrixBlock _data;
+	
+	private int[] _sampleIndexes;
+	private int lastIndex = -1;
+
+	// reusable return
+	private DblArray nonZeroReturn;
+	private DblArray reusableReturn;
+	private double[] reusableArr;
+
+	public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes, boolean skipZeros) 
+	{
+		super(colIndexes, -1, skipZeros);
+		_data = data;
+		_sampleIndexes = sampleIndexes;
+		reusableArr = new double[colIndexes.length];
+		reusableReturn = new DblArray(reusableArr);
+	}
+
+	@Override
+	public DblArray nextRow() {
+		if (_skipZeros) {
+			while ((nonZeroReturn = getNextRow()) != null
+					&& DblArray.isZero(nonZeroReturn));
+			return nonZeroReturn;
+		} else {
+			return getNextRow();
+		}
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	private DblArray getNextRow() {
+		if (lastIndex == _sampleIndexes.length - 1)
+			return null;
+		lastIndex++;
+		for (int i = 0; i < _colIndexes.length; i++) {
+			reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+					_data.quickGetValue(_colIndexes[i], _sampleIndexes[lastIndex]) :
+					_data.quickGetValue(_sampleIndexes[lastIndex], _colIndexes[i]);
+		}
+		return reusableReturn;
+	}
+
+	@Override
+	public int getCurrentRowIndex() {
+		return _sampleIndexes[lastIndex];
+	}
+	
+	@Override
+	public void reset() {
+		lastIndex = -1;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
new file mode 100644
index 0000000..d2ef5a4
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseRow;
+
+/**
+ * Used to extract the values at certain indexes from each row in a sparse
+ * matrix
+ * 
+ * Keeps returning all-zeros arrays until reaching the last possible index. The
+ * current compression algorithm treats the zero-value in a sparse matrix like
+ * any other value.
+ */
+public class ReaderColumnSelectionSparse extends ReaderColumnSelection 
+{
+	private final DblArray ZERO_DBL_ARRAY;
+	private DblArray nonZeroReturn;
+
+	// reusable return
+	private DblArray reusableReturn;
+	private double[] reusableArr;
+
+	// current sparse row positions
+	private SparseRow[] sparseCols = null;
+	private int[] sparsePos = null;
+	
+	public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros) 
+	{
+		super(colIndexes, CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+				data.getNumColumns() : data.getNumRows(), skipZeros);
+		ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true);
+		reusableArr = new double[colIndexes.length];
+		reusableReturn = new DblArray(reusableArr);
+		
+		if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
+			throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
+		}
+		
+		sparseCols = new SparseRow[colIndexes.length];
+		sparsePos = new int[colIndexes.length];
+		if( data.getSparseBlock()!=null )
+		for( int i=0; i<colIndexes.length; i++ )
+			sparseCols[i] = data.getSparseBlock().get(colIndexes[i]);
+		Arrays.fill(sparsePos, 0);
+	}
+
+	@Override
+	public DblArray nextRow() {
+		if(_skipZeros) {
+			while ((nonZeroReturn = getNextRow()) != null
+				&& nonZeroReturn == ZERO_DBL_ARRAY);
+			return nonZeroReturn;
+		} else {
+			return getNextRow();
+		}
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	private DblArray getNextRow() 
+	{
+		if(_lastRow == _numRows-1)
+			return null;
+		_lastRow++;
+		
+		if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
+			throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
+		}
+		
+		//move pos to current row if necessary (for all columns)
+		for( int i=0; i<_colIndexes.length; i++ )
+			if( sparseCols[i] != null && (sparseCols[i].indexes().length<=sparsePos[i] 
+				|| sparseCols[i].indexes()[sparsePos[i]]<_lastRow) )
+			{
+				sparsePos[i]++;
+			}
+		
+		//extract current values
+		Arrays.fill(reusableArr, 0);
+		boolean zeroResult = true;
+		for( int i=0; i<_colIndexes.length; i++ ) 
+			if( sparseCols[i] != null && sparseCols[i].indexes().length>sparsePos[i]
+				&&sparseCols[i].indexes()[sparsePos[i]]==_lastRow )
+			{
+				reusableArr[i] = sparseCols[i].values()[sparsePos[i]];
+				zeroResult = false;
+			}
+
+		return zeroResult ? ZERO_DBL_ARRAY : reusableReturn;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
new file mode 100644
index 0000000..971f438
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
+
+/** 
+ * Uncompressed representation of one or more columns in bitmap format. 
+ * 
+ */
+public final class UncompressedBitmap 
+{
+	private int _numCols;
+
+	/** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/
+	private double[] _values;
+
+	/** Bitmaps (as lists of offsets) for each of the values. */
+	private int[][] _offsetsLists;
+
+	public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns ) 
+	{
+		// added for one pass bitmap construction
+		// Convert inputs to arrays
+		int numVals = distinctVals.size();
+		_values = new double[numVals*numColumns];
+		_offsetsLists = new int[numVals][];
+		int bitmapIx = 0;
+		for( DArrayIListEntry val : distinctVals.extractValues()) {
+			System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns);
+			_offsetsLists[bitmapIx++] = val.value.extractValues();
+		}
+		_numCols = numColumns;
+	}
+
+	public UncompressedBitmap( DoubleIntListHashMap distinctVals ) 
+	{
+		// added for one pass bitmap construction
+		// Convert inputs to arrays
+		int numVals = distinctVals.size();
+		_values = new double[numVals];
+		_offsetsLists = new int[numVals][];
+		int bitmapIx = 0;
+		for(DIListEntry val : distinctVals.extractValues()) {
+			_values[bitmapIx] = val.key;
+			_offsetsLists[bitmapIx++] = val.value.extractValues();
+		}
+		_numCols = 1;
+	}
+	
+	public int getNumColumns() {
+		return _numCols;
+	}
+
+	/**
+	 * @param ix   index of a particular distinct value
+	 * @return the tuple of column values associated with the specified index
+	 */
+	public double[] getValues(int ix) {
+		return Arrays.copyOfRange(_values, ix*_numCols, (ix+1)*_numCols);
+	}
+
+	/**
+	 * @return number of distinct values in the column; this number is also the
+	 *         number of bitmaps, since there is one bitmap per value
+	 */
+	public int getNumValues() {
+		return _values.length / _numCols;
+	}
+
+	/**
+	 * @param ix   index of a particular distinct value
+	 * @return IMMUTABLE array of the offsets of the rows containing the value
+	 *         with the indicated index
+	 */
+	public int[] getOffsetsList(int ix) {
+		return _offsetsLists[ix];
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
new file mode 100644
index 0000000..1a1ae55
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.UncompressedBitmap;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Base class for all compressed size estimators
+ */
+public abstract class CompressedSizeEstimator 
+{
+	protected MatrixBlock _data;
+
+	public CompressedSizeEstimator(MatrixBlock data) {
+		_data = data;
+	}
+
+	/**
+	 * 
+	 * @param colIndexes
+	 * @return
+	 */
+	public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes);
+
+	/**
+	 * 
+	 * @param ubm
+	 * @return
+	 */
+	public abstract CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm);
+
+	/**
+	 * 
+	 * @param ubm
+	 * @param inclRLE
+	 * @return
+	 */
+	protected SizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm, boolean inclRLE) {
+		int numVals = ubm.getNumValues();
+		int numRuns = 0;
+		int numOffs = 0;
+		int numSegs = 0;
+		int numSingle = 0;
+		
+		//compute size estimation factors
+		for (int i = 0; i < numVals; i++) {
+			int[] list = ubm.getOffsetsList(i);
+			numOffs += list.length;
+			numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1;
+			numSingle += (list.length==1) ? 1 : 0;
+			if( inclRLE ) {
+				int lastOff = -2;
+				for (int j = 0; j < list.length; j++) {
+					if (list[j] != lastOff + 1)
+						numRuns++;
+					lastOff = list[j];
+				}
+			}
+		}
+		
+		//construct estimation factors
+		return new SizeEstimationFactors(numVals, numSegs, numOffs, numRuns, numSingle);
+	}
+
+	/**
+	 * Estimates the number of bytes needed to encode this column group
+	 * in RLE encoding format.
+	 * 
+	 * @param numVals
+	 * @param numRuns
+	 * @param numCols
+	 * @return
+	 */
+	protected static long getRLESize(int numVals, int numRuns, int numCols) {
+		int ret = 0;
+		//distinct value tuples [double per col]
+		ret += 8 * numVals * numCols;
+		//offset/len fields per distinct value tuple [2xint]
+		ret += 8 * numVals;
+		//run data [2xchar]
+		ret += 4 * numRuns;
+		return ret;
+	}
+
+	/**
+	 * Estimates the number of bytes needed to encode this column group 
+	 * in OLE format.
+	 * 
+	 * @param numVals
+	 * @param numOffs
+	 * @param numSeqs
+	 * @param numCols
+	 * @return
+	 */
+	protected static long getOLESize(int numVals, float numOffs, int numSeqs, int numCols) {
+		int ret = 0;
+		//distinct value tuples [double per col]
+		ret += 8 * numVals * numCols;
+		//offset/len fields per distinct value tuple [2xint]
+		ret += 8 * numVals;
+		//offset list data [1xchar]
+		ret += 2 * numOffs;
+		//offset list seqment headers [1xchar]
+		ret += 2 * numSeqs;
+		return ret;
+	}
+	
+	/**
+	 * 
+	 */
+	protected static class SizeEstimationFactors {
+ 		protected int numVals;   //num value tuples
+ 		protected int numSegs;   //num OLE segments 
+ 		protected int numOffs;   //num OLE offsets
+ 		protected int numRuns;   //num RLE runs
+ 		protected int numSingle; //num singletons
+		
+		protected SizeEstimationFactors(int numvals, int numsegs, int numoffs, int numruns, int numsingle) {
+			numVals = numvals;
+			numSegs = numsegs;
+			numOffs = numoffs;
+			numRuns = numruns;
+			numSingle = numsingle;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
new file mode 100644
index 0000000..557c518
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.UncompressedBitmap;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Exact compressed size estimator (examines entire dataset).
+ * 
+ */
+public class CompressedSizeEstimatorExact extends CompressedSizeEstimator 
+{
+	public CompressedSizeEstimatorExact(MatrixBlock data) {
+		super(data);
+	}
+
+	@Override
+	public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) {
+		return estimateCompressedColGroupSize(
+			BitmapEncoder.extractBitmap(colIndexes, _data));
+	}
+
+	@Override
+	public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm) 
+	{
+		//compute size estimation factors
+		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
+		
+		//construct new size info summary
+		return new CompressedSizeInfo(fact.numVals,
+				getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
+				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
new file mode 100644
index 0000000..76a0f06
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorSample.java
@@ -0,0 +1,767 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.commons.math3.distribution.ChiSquaredDistribution;
+import org.apache.commons.math3.random.RandomDataGenerator;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.ReaderColumnSelection;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysml.runtime.compress.ReaderColumnSelectionDense;
+import org.apache.sysml.runtime.compress.ReaderColumnSelectionDenseSample;
+import org.apache.sysml.runtime.compress.ReaderColumnSelectionSparse;
+import org.apache.sysml.runtime.compress.UncompressedBitmap;
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+public class CompressedSizeEstimatorSample extends CompressedSizeEstimator 
+{
+	private static final boolean CORRECT_NONZERO_ESTIMATE = false; //TODO enable for production
+	private final static double SHLOSSER_JACKKNIFE_ALPHA = 0.975;
+	public static final float HAAS_AND_STOKES_ALPHA1 = 0.9F; //0.9 recommended in paper
+	public static final float HAAS_AND_STOKES_ALPHA2 = 30F; //30 recommended in paper
+	public static final float HAAS_AND_STOKES_UJ2A_C = 50; //50 recommend in paper
+
+	private int[] _sampleRows = null;
+	private RandomDataGenerator _rng = null;
+	private int _numRows = -1;
+	
+	/**
+	 * 
+	 * @param data
+	 * @param sampleRows
+	 */
+	public CompressedSizeEstimatorSample(MatrixBlock data, int[] sampleRows) {
+		super(data);
+		_sampleRows = sampleRows;
+		_rng = new RandomDataGenerator();
+		_numRows = CompressedMatrixBlock.TRANSPOSE_INPUT ? 
+				_data.getNumColumns() : _data.getNumRows();
+	}
+
+	/**
+	 * 
+	 * @param mb
+	 * @param sampleSize
+	 */
+	public CompressedSizeEstimatorSample(MatrixBlock mb, int sampleSize) {
+		this(mb, null);
+		_sampleRows = getSortedUniformSample(_numRows, sampleSize);
+	}
+
+	/**
+	 * 
+	 * @param sampleRows, assumed to be sorted
+	 */
+	public void setSampleRows(int[] sampleRows) {
+		_sampleRows = sampleRows;
+	}
+
+	/**
+	 * 
+	 * @param sampleSize
+	 */
+	public void resampleRows(int sampleSize) {
+		_sampleRows = getSortedUniformSample(_numRows, sampleSize);
+	}
+
+	@Override
+	public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) 
+	{
+		//extract statistics from sample
+		UncompressedBitmap ubm = BitmapEncoder.extractBitmapFromSample(
+				colIndexes, _data, _sampleRows);
+		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, false);
+
+		//estimate number of distinct values 
+		int totalCardinality = getNumDistinctValues(colIndexes);
+		totalCardinality = Math.max(totalCardinality, fact.numVals); //fix anomalies w/ large sample fraction
+		totalCardinality = Math.min(totalCardinality, _numRows); //fix anomalies w/ large sample fraction
+		
+		//estimate unseen values
+		// each unseen is assumed to occur only once (it did not show up in the sample because it is rare)
+		int unseen = Math.max(0, totalCardinality - fact.numVals);
+		int sampleSize = _sampleRows.length;
+		
+		//estimate number of offsets
+		double sparsity = OptimizerUtils.getSparsity(
+				_data.getNumRows(), _data.getNumColumns(), _data.getNonZeros());
+		
+		// expected value given that we don't store the zero values
+		float totalNumOffs = (float) (_numRows * (1 - Math.pow(1 - sparsity,colIndexes.length)));		
+		if( CORRECT_NONZERO_ESTIMATE ) {
+			long numZeros = sampleSize - fact.numOffs;
+			float C = Math.max(1-(float)fact.numSingle/sampleSize, (float)sampleSize/_numRows); 
+			totalNumOffs = _numRows - ((numZeros>0)? (float)_numRows/sampleSize*C*numZeros : 0);
+		}
+		
+		// For a single offset, the number of blocks depends on the value of
+		// that offset. small offsets (first group of rows in the matrix)
+		// require a small number of blocks and large offsets (last group of
+		// rows) require a large number of blocks. The unseen offsets are
+		// distributed over the entire offset range. A reasonable and fast
+		// estimate for the number of blocks is to use the arithmetic mean of
+		// the number of blocks used for the first index (=1) and that of the
+		// last index.
+		int numUnseenSeg = Math.round(unseen
+				* (2.0f * BitmapEncoder.BITMAP_BLOCK_SZ + _numRows) / 2
+				/ BitmapEncoder.BITMAP_BLOCK_SZ);
+		int totalNumSeg = fact.numSegs + numUnseenSeg;
+		int totalNumRuns = getNumRuns(ubm, sampleSize, _numRows) + unseen;
+
+		//construct new size info summary
+		return new CompressedSizeInfo(totalCardinality,
+				getRLESize(totalCardinality, totalNumRuns, colIndexes.length),
+				getOLESize(totalCardinality, totalNumOffs, totalNumSeg, colIndexes.length));
+	}
+
+	@Override
+	public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm) 
+	{
+		//compute size estimation factors
+		SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
+		
+		//construct new size info summary
+		return new CompressedSizeInfo(fact.numVals,
+				getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
+				getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
+	}
+	
+	/**
+	 * 
+	 * @param colIndexes
+	 * @return
+	 */
+	private int getNumDistinctValues(int[] colIndexes) {
+		return haasAndStokes(colIndexes);
+	}
+
+	/**
+	 * 
+	 * @param sampleUncompressedBitmap
+	 * @param sampleSize
+	 * @param totalNumRows
+	 * @return
+	 */
+	private int getNumRuns(UncompressedBitmap sampleUncompressedBitmap,
+			int sampleSize, int totalNumRows) {
+		int numVals = sampleUncompressedBitmap.getNumValues();
+		// all values in the sample are zeros
+		if (numVals == 0)
+			return 0;
+		float numRuns = 0;
+		for (int vi = 0; vi < numVals; vi++) {
+			int[] offsets = sampleUncompressedBitmap.getOffsetsList(vi);
+			float offsetsRatio = ((float) offsets.length) / sampleSize;
+			float avgAdditionalOffsets = offsetsRatio * totalNumRows
+					/ sampleSize;
+			if (avgAdditionalOffsets < 1) {
+				// Ising-Stevens does not hold
+				// fall-back to using the expected number of offsets as an upper
+				// bound on the number of runs
+				numRuns += ((float) offsets.length) * totalNumRows / sampleSize;
+				continue;
+			}
+			int intervalEnd, intervalSize;
+			float additionalOffsets;
+			// probability of an index being non-offset in current and previous
+			// interval respectively
+			float nonOffsetProb, prevNonOffsetProb = 1;
+			boolean reachedSampleEnd = false;
+			// handling the first interval separately for simplicity
+			int intervalStart = -1;
+			if (_sampleRows[0] == 0) {
+				// empty interval
+				intervalStart = 0;
+			} else {
+				intervalEnd = _sampleRows[0];
+				intervalSize = intervalEnd - intervalStart - 1;
+				// expected value of a multivariate hypergeometric distribution
+				additionalOffsets = offsetsRatio * intervalSize;
+				// expected value of an Ising-Stevens distribution
+				numRuns += (intervalSize - additionalOffsets)
+						* additionalOffsets / intervalSize;
+				intervalStart = intervalEnd;
+				prevNonOffsetProb = (intervalSize - additionalOffsets)
+						/ intervalSize;
+			}
+			// for handling separators
+
+			int withinSepRun = 0;
+			boolean seenNonOffset = false, startedWithOffset = false, endedWithOffset = false;
+			int offsetsPtrs = 0;
+			for (int ix = 1; ix < sampleSize; ix++) {
+				// start of a new separator
+				// intervalStart will always be pointing at the current value
+				// in the separator block
+
+				if (offsetsPtrs < offsets.length
+						&& offsets[offsetsPtrs] == intervalStart) {
+					startedWithOffset = true;
+					offsetsPtrs++;
+					endedWithOffset = true;
+				} else {
+					seenNonOffset = true;
+					endedWithOffset = false;
+				}
+				while (intervalStart + 1 == _sampleRows[ix]) {
+					intervalStart = _sampleRows[ix];
+					if (seenNonOffset) {
+						if (offsetsPtrs < offsets.length
+								&& offsets[offsetsPtrs] == intervalStart) {
+							withinSepRun = 1;
+							offsetsPtrs++;
+							endedWithOffset = true;
+						} else {
+							numRuns += withinSepRun;
+							withinSepRun = 0;
+							endedWithOffset = false;
+						}
+					} else if (offsetsPtrs < offsets.length
+							&& offsets[offsetsPtrs] == intervalStart) {
+						offsetsPtrs++;
+						endedWithOffset = true;
+					} else {
+						seenNonOffset = true;
+						endedWithOffset = false;
+					}
+					//
+					ix++;
+					if (ix == sampleSize) {
+						// end of sample which searching for a start
+						reachedSampleEnd = true;
+						break;
+					}
+				}
+
+				// runs within an interval of unknowns
+				if (reachedSampleEnd)
+					break;
+				intervalEnd = _sampleRows[ix];
+				intervalSize = intervalEnd - intervalStart - 1;
+				// expected value of a multivariate hypergeometric distribution
+				additionalOffsets = offsetsRatio * intervalSize;
+				// expected value of an Ising-Stevens distribution
+				numRuns += (intervalSize - additionalOffsets)
+						* additionalOffsets / intervalSize;
+				nonOffsetProb = (intervalSize - additionalOffsets)
+						/ intervalSize;
+
+				// additional runs resulting from x's on the boundaries of the
+				// separators
+				// endedWithOffset = findInArray(offsets, intervalStart) != -1;
+				if (seenNonOffset) {
+					if (startedWithOffset) {
+						// add p(y in the previous interval)
+						numRuns += prevNonOffsetProb;
+					}
+					if (endedWithOffset) {
+						// add p(y in the current interval)
+						numRuns += nonOffsetProb;
+					}
+				} else {
+					// add p(y in the previous interval and y in the current
+					// interval)
+					numRuns += prevNonOffsetProb * nonOffsetProb;
+				}
+				prevNonOffsetProb = nonOffsetProb;
+				intervalStart = intervalEnd;
+				// reseting separator variables
+				seenNonOffset = startedWithOffset = endedWithOffset = false;
+				withinSepRun = 0;
+
+			}
+			// last possible interval
+			if (intervalStart != totalNumRows - 1) {
+				intervalEnd = totalNumRows;
+				intervalSize = intervalEnd - intervalStart - 1;
+				// expected value of a multivariate hypergeometric distribution
+				additionalOffsets = offsetsRatio * intervalSize;
+				// expected value of an Ising-Stevens distribution
+				numRuns += (intervalSize - additionalOffsets)
+						* additionalOffsets / intervalSize;
+				nonOffsetProb = (intervalSize - additionalOffsets)
+						/ intervalSize;
+			} else {
+				nonOffsetProb = 1;
+			}
+			// additional runs resulting from x's on the boundaries of the
+			// separators
+			endedWithOffset = intervalStart == offsets[offsets.length - 1];
+			if (seenNonOffset) {
+				if (startedWithOffset) {
+					numRuns += prevNonOffsetProb;
+				}
+				if (endedWithOffset) {
+					// add p(y in the current interval)
+					numRuns += nonOffsetProb;
+				}
+			} else {
+				if (endedWithOffset)
+					// add p(y in the previous interval and y in the current
+					// interval)
+					numRuns += prevNonOffsetProb * nonOffsetProb;
+			}
+		}
+		return Math.round(numRuns);
+	}
+
+	/**
+	 * 
+	 * @param colIndexes
+	 * @return
+	 */
+	private int haasAndStokes(int[] colIndexes) {
+		ReaderColumnSelection reader =  new ReaderColumnSelectionDenseSample(_data, 
+				colIndexes, _sampleRows, !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+		return haasAndStokes(_numRows, _sampleRows.length, reader);
+	}
+
+	/**
+	 * TODO remove, just for local debugging.
+	 * 
+	 * @param colIndexes
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	private int getExactNumDistinctValues(int[] colIndexes) {
+		HashSet<DblArray> distinctVals = new HashSet<DblArray>();
+		ReaderColumnSelection reader = (_data.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT) ? 
+				new ReaderColumnSelectionSparse(_data, colIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS) : 
+				new ReaderColumnSelectionDense(_data, colIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+		DblArray val = null;
+		while (null != (val = reader.nextRow()))
+			distinctVals.add(val);
+		return distinctVals.size();
+	}
+
+	/**
+	 * Returns a sorted array of n integers, drawn uniformly from the range [0,range).
+	 * 
+	 * @param range
+	 * @param smplSize
+	 * @return
+	 */
+	private int[] getSortedUniformSample(int range, int smplSize) {
+		if (smplSize == 0)
+			return new int[] {};
+		int[] sample = _rng.nextPermutation(range, smplSize);
+		Arrays.sort(sample);
+		return sample;
+	}
+	
+
+	/////////////////////////////////////////////////////
+	// Sample Cardinality Estimator library
+	/////////////////////////////////////////
+	
+	/**
+	 * M. Charikar, S. Chaudhuri, R. Motwani, and V. R. Narasayya, Towards
+	 * estimation error guarantees for distinct values, PODS'00.
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 *            : a reader for the sampled rows
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	private static int guaranteedErrorEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader) {
+		HashMap<DblArray, Integer> valsCount = getValCounts(sampleRowsReader);
+		// number of values that occur only once
+		int singltonValsCount = 0;
+		int otherValsCount = 0;
+		for (Integer c : valsCount.values()) {
+			if (c == 1)
+				singltonValsCount++;
+			else
+				otherValsCount++;
+		}
+		return (int) Math.round(otherValsCount + singltonValsCount
+				* Math.sqrt(((double) nRows) / sampleSize));
+	}
+
+	/**
+	 * Peter J. Haas, Jeffrey F. Naughton, S. Seshadri, and Lynne Stokes. 
+	 * Sampling-Based Estimation of the Number of Distinct Values of an
+	 * Attribute. VLDB'95, Section 3.2.
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	private static int shlosserEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader) 
+	{
+		return shlosserEstimator(nRows, sampleSize, sampleRowsReader,
+				getValCounts(sampleRowsReader));
+	}
+
+	/**
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @param valsCount
+	 * @return
+	 */
+	private static int shlosserEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader,
+			HashMap<DblArray, Integer> valsCount) 
+	{
+		double q = ((double) sampleSize) / nRows;
+		double oneMinusQ = 1 - q;
+
+		int[] freqCounts = getFreqCounts(valsCount);
+
+		double numerSum = 0, denomSum = 0;
+		int iPlusOne = 1;
+		for (int i = 0; i < freqCounts.length; i++, iPlusOne++) {
+			numerSum += Math.pow(oneMinusQ, iPlusOne) * freqCounts[i];
+			denomSum += iPlusOne * q * Math.pow(oneMinusQ, i) * freqCounts[i];
+		}
+		int estimate = (int) Math.round(valsCount.size() + freqCounts[0]
+				* numerSum / denomSum);
+		return estimate < 1 ? 1 : estimate;
+	}
+
+	/**
+	 * Peter J. Haas, Jeffrey F. Naughton, S. Seshadri, and Lynne Stokes.
+	 * Sampling-Based Estimation of the Number of Distinct Values of an
+	 * Attribute. VLDB'95, Section 4.3.
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	private static int smoothedJackknifeEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader) 
+	{
+		return smoothedJackknifeEstimator(nRows, sampleSize, sampleRowsReader,
+				getValCounts(sampleRowsReader));
+	}
+
+	/**
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @param valsCount
+	 * @return
+	 */
+	private static int smoothedJackknifeEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader,
+			HashMap<DblArray, Integer> valsCount) 
+	{
+		int[] freqCounts = getFreqCounts(valsCount);
+		// all values in the sample are zeros
+		if (freqCounts.length == 0)
+			return 0;
+		// nRows is N and sampleSize is n
+
+		int d = valsCount.size();
+		double f1 = freqCounts[0];
+		int Nn = nRows * sampleSize;
+		double D0 = (d - f1 / sampleSize)
+				/ (1 - (nRows - sampleSize + 1) * f1 / Nn);
+		double NTilde = nRows / D0;
+		/*-
+		 *
+		 * h (as defined in eq. 5 in the paper) can be implemented as:
+		 * 
+		 * double h = Gamma(nRows - NTilde + 1) x Gamma.gamma(nRows -sampleSize + 1) 
+		 * 		     ----------------------------------------------------------------
+		 *  		Gamma.gamma(nRows - sampleSize - NTilde + 1) x Gamma.gamma(nRows + 1)
+		 * 
+		 * 
+		 * However, for large values of nRows, Gamma.gamma returns NAN
+		 * (factorial of a very large number).
+		 * 
+		 * The following implementation solves this problem by levaraging the
+		 * cancelations that show up when expanding the factorials in the
+		 * numerator and the denominator.
+		 * 
+		 * 
+		 * 		min(A,D-1) x [min(A,D-1) -1] x .... x B
+		 * h = -------------------------------------------
+		 * 		C x [C-1] x .... x max(A+1,D)
+		 * 
+		 * where A = N-\tilde{N}
+		 *       B = N-\tilde{N} - n + a
+		 *       C = N
+		 *       D = N-n+1
+		 *       
+		 * 		
+		 *
+		 */
+		double A = (int) nRows - NTilde;
+		double B = A - sampleSize + 1;
+		double C = nRows;
+		double D = nRows - sampleSize + 1;
+		A = Math.min(A, D - 1);
+		D = Math.max(A + 1, D);
+		double h = 1;
+
+		for (; A >= B || C >= D; A--, C--) {
+			if (A >= B)
+				h *= A;
+			if (C >= D)
+				h /= C;
+		}
+		// end of h computation
+
+		double g = 0, gamma = 0;
+		// k here corresponds to k+1 in the paper (the +1 comes from replacing n
+		// with n-1)
+		for (int k = 2; k <= sampleSize + 1; k++) {
+			g += 1.0 / (nRows - NTilde - sampleSize + k);
+		}
+		for (int i = 1; i <= freqCounts.length; i++) {
+			gamma += i * (i - 1) * freqCounts[i - 1];
+		}
+		gamma *= (nRows - 1) * D0 / Nn / (sampleSize - 1);
+		gamma += D0 / nRows - 1;
+
+		double estimate = (d + nRows * h * g * gamma)
+				/ (1 - (nRows - NTilde - sampleSize + 1) * f1 / Nn);
+		return estimate < 1 ? 1 : (int) Math.round(estimate);
+	}
+
+	/**
+	 * Peter J. Haas, Jeffrey F. Naughton, S. Seshadri, and Lynne Stokes. 1995.
+	 * Sampling-Based Estimation of the Number of Distinct Values of an
+	 * Attribute. VLDB'95, Section 5.2, recommended estimator by the authors
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	private static int shlosserJackknifeEstimator(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader) {
+		HashMap<DblArray, Integer> valsCount = getValCounts(sampleRowsReader);
+
+		// uniformity chi-square test
+		double nBar = ((double) sampleSize) / valsCount.size();
+		// test-statistic
+		double u = 0;
+		for (int cnt : valsCount.values()) {
+			u += Math.pow(cnt - nBar, 2);
+		}
+		u /= nBar;
+		if (sampleSize != usedSampleSize)
+			computeCriticalValue(sampleSize);
+		if (u < uniformityCriticalValue) {
+			// uniform
+			return smoothedJackknifeEstimator(nRows, sampleSize,
+					sampleRowsReader, valsCount);
+		} else {
+			return shlosserEstimator(nRows, sampleSize, sampleRowsReader,
+					valsCount);
+		}
+	}
+
+	/*
+	 * In the shlosserSmoothedJackknifeEstimator as long as the sample size did
+	 * not change, we will have the same critical value each time the estimator
+	 * is used (given that alpha is the same). We cache the critical value to
+	 * avoid recomputing it in each call.
+	 */
+	private static double uniformityCriticalValue;
+	private static int usedSampleSize;
+	
+	private static void computeCriticalValue(int sampleSize) {
+		ChiSquaredDistribution chiSqr = new ChiSquaredDistribution(sampleSize - 1);
+		uniformityCriticalValue = chiSqr.inverseCumulativeProbability(SHLOSSER_JACKKNIFE_ALPHA);
+		usedSampleSize = sampleSize;
+	}
+
+	/**
+	 * Haas, Peter J., and Lynne Stokes.
+	 * "Estimating the number of classes in a finite population." Journal of the
+	 * American Statistical Association 93.444 (1998): 1475-1487.
+	 * 
+	 * The hybrid estimator given by Eq. 33 in Section 6
+	 * 
+	 * @param nRows
+	 * @param sampleSize
+	 * @param sampleRowsReader
+	 * @return
+	 */
+	private static int haasAndStokes(int nRows, int sampleSize,
+			ReaderColumnSelection sampleRowsReader) 
+	{
+		HashMap<DblArray, Integer> valsCount = getValCounts(sampleRowsReader);
+		// all values in the sample are zeros.
+		if (valsCount.size() == 0)
+			return 1;
+		int[] freqCounts = getFreqCounts(valsCount);
+		float q = ((float) sampleSize) / nRows;
+		float _1MinusQ = 1 - q;
+		// Eq. 11
+		float duj1Fraction = ((float) sampleSize)
+				/ (sampleSize - _1MinusQ * freqCounts[0]);
+		float duj1 = duj1Fraction * valsCount.size();
+		// Eq. 16
+		float gamma = 0;
+		for (int i = 1; i <= freqCounts.length; i++) {
+			gamma += i * (i - 1) * freqCounts[i - 1];
+		}
+		gamma *= duj1 / sampleSize / sampleSize;
+		gamma += duj1 / nRows - 1;
+		gamma = Math.max(gamma, 0);
+		int estimate;
+		
+		if (gamma < HAAS_AND_STOKES_ALPHA1) {
+			// UJ2 - begining of page 1479
+		//	System.out.println("uj2");
+			estimate = (int) (duj1Fraction * (valsCount.size() - freqCounts[0]
+					* _1MinusQ * Math.log(_1MinusQ) * gamma / q));
+		} else if (gamma < HAAS_AND_STOKES_ALPHA2) {
+			// UJ2a - end of page 1998
+			//System.out.println("uj2a");
+			int numRemovedClasses = 0;
+			float updatedNumRows = nRows;
+			int updatedSampleSize = sampleSize;
+
+			for (Integer cnt : valsCount.values()) {
+				if (cnt > HAAS_AND_STOKES_UJ2A_C) {
+					numRemovedClasses++;
+					freqCounts[cnt - 1]--;
+					updatedSampleSize -= cnt;
+					/*
+					 * To avoid solving Eq. 20 numerically for the class size in
+					 * the full population (N_j), the current implementation
+					 * just scales cnt (n_j) by the sampling ratio (q).
+					 * Intuitively, the scaling should be fine since cnt is
+					 * large enough. Also, N_j in Eq. 20 is lower-bounded by cnt
+					 * which is already large enough to make the denominator in
+					 * Eq. 20 very close to 1.
+					 */
+					updatedNumRows -= ((float) cnt) / q;
+				}
+			}
+			if (updatedSampleSize == 0) {
+				// use uJ2a
+				
+				estimate = (int) (duj1Fraction * (valsCount.size() - freqCounts[0]
+						* (_1MinusQ) * Math.log(_1MinusQ) * gamma / q));
+			} else {
+				float updatedQ = ((float) updatedSampleSize) / updatedNumRows;
+				int updatedSampleCardinality = valsCount.size()
+						- numRemovedClasses;
+				float updatedDuj1Fraction = ((float) updatedSampleSize)
+						/ (updatedSampleSize - (1 - updatedQ) * freqCounts[0]);
+				float updatedDuj1 = updatedDuj1Fraction
+						* updatedSampleCardinality;
+				float updatedGamma = 0;
+				for (int i = 1; i <= freqCounts.length; i++) {
+					updatedGamma += i * (i - 1) * freqCounts[i - 1];
+				}
+				updatedGamma *= updatedDuj1 / updatedSampleSize
+						/ updatedSampleSize;
+				updatedGamma += updatedDuj1 / updatedNumRows - 1;
+				updatedGamma = Math.max(updatedGamma, 0);
+
+				estimate = (int) (updatedDuj1Fraction * (updatedSampleCardinality - freqCounts[0]
+						* (1 - updatedQ)
+						* Math.log(1 - updatedQ)
+						* updatedGamma / updatedQ))
+						+ numRemovedClasses;
+			}
+
+		} else {
+			// Sh3 - end of section 3
+			float fraq1Numer = 0;
+			float fraq1Denom = 0;
+			float fraq2Numer = 0;
+			float fraq2Denom = 0;
+			for (int i = 1; i <= freqCounts.length; i++) {
+				fraq1Numer += i * q * q * Math.pow(1 - q * q, i - 1)
+						* freqCounts[i - 1];
+				fraq1Denom += Math.pow(_1MinusQ, i) * (Math.pow(1 + q, i) - 1)
+						* freqCounts[i - 1];
+				fraq2Numer += Math.pow(_1MinusQ, i) * freqCounts[i - 1];
+				fraq2Denom += i * q * Math.pow(_1MinusQ, i - 1)
+						* freqCounts[i - 1];
+			}
+			estimate = (int) (valsCount.size() + freqCounts[0] * fraq1Numer
+					/ fraq1Denom * fraq2Numer * fraq2Numer / fraq2Denom
+					/ fraq2Denom);
+		}
+		return estimate < 1 ? 1 : estimate;
+	}
+
+	/**
+	 * 
+	 * @param sampleRowsReader
+	 * @return
+	 */
+	private static HashMap<DblArray, Integer> getValCounts(
+			ReaderColumnSelection sampleRowsReader) 
+	{
+		HashMap<DblArray, Integer> valsCount = new HashMap<DblArray, Integer>();
+		DblArray val = null;
+		Integer cnt;
+		while (null != (val = sampleRowsReader.nextRow())) {
+			cnt = valsCount.get(val);
+			if (cnt == null)
+				cnt = 0;
+			cnt++;
+			valsCount.put(val, cnt);
+		}
+		return valsCount;
+	}
+
+	/**
+	 * 
+	 * @param valsCount
+	 * @return
+	 */
+	private static int[] getFreqCounts(HashMap<DblArray, Integer> valsCount) 
+	{
+		int maxCount = 0;
+		for (Integer c : valsCount.values()) {
+			if (c > maxCount)
+				maxCount = c;
+		}
+		
+		/*
+		 * freqCounts[i-1] = how many values occured with a frequecy i
+		 */
+		int[] freqCounts = new int[maxCount];
+		for (Integer c : valsCount.values()) {
+			freqCounts[c - 1]++;
+		}
+		return freqCounts;
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
new file mode 100644
index 0000000..834483e
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeInfo.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+/**
+ * 
+ * A helper reusable object for maintaining bitmap sizes
+ */
+public class CompressedSizeInfo 
+{
+	private int _estCard = -1;
+	private long _rleSize = -1; 
+	private long _oleSize = -1;
+
+	public CompressedSizeInfo() {
+		
+	}
+
+	public CompressedSizeInfo(int estCard, long rleSize, long oleSize) {
+		_estCard = estCard;
+		_rleSize = rleSize;
+		_oleSize = oleSize;
+	}
+
+	public void setRLESize(long rleSize) {
+		_rleSize = rleSize;
+	}
+	
+	public long getRLESize() {
+		return _rleSize;
+	}
+	
+	public void setOLESize(long oleSize) {
+		_oleSize = oleSize;
+	}
+
+	public long getOLESize() {
+		return _oleSize;
+	}
+
+	public long getMinSize() {
+		return Math.min(_rleSize, _oleSize);
+	}
+
+	public void setEstCardinality(int estCard) {
+		_estCard = estCard;
+	}
+
+	public int getEstCarinality() {
+		return _estCard;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java b/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
new file mode 100644
index 0000000..f857b5b
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/SizeEstimatorFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+public class SizeEstimatorFactory 
+{
+	public static final float SAMPLING_RATIO = 0.01f; //conservative default
+	
+	/**
+	 * 
+	 * @param data
+	 * @param numRows
+	 * @return
+	 */
+	@SuppressWarnings("unused")
+	public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, int numRows) {
+		return (SAMPLING_RATIO == 1.0) ?
+				new CompressedSizeEstimatorExact(data):
+				new CompressedSizeEstimatorSample(data, (int) (numRows*SAMPLING_RATIO));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java b/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
new file mode 100644
index 0000000..e87ac29
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/ConverterUtils.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.ColGroup;
+import org.apache.sysml.runtime.compress.ColGroupOLE;
+import org.apache.sysml.runtime.compress.ColGroupRLE;
+import org.apache.sysml.runtime.compress.ColGroupUncompressed;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.util.DataConverter;
+
+public class ConverterUtils 
+{
+	/**
+	 * Copy col group instance with deep copy of column indices but
+	 * shallow copy of actual contents;
+	 * 
+	 * @param group
+	 * @return
+	 */
+	public static ColGroup copyColGroup(ColGroup group)
+	{
+		ColGroup ret = null;
+		
+		//deep copy col indices
+		int[] colIndices = Arrays.copyOf(group.getColIndices(), group.getNumCols());
+		
+		//create copy of column group
+		if( group instanceof ColGroupUncompressed ) {
+			ColGroupUncompressed in = (ColGroupUncompressed)group;
+			ret = new ColGroupUncompressed(colIndices, in.getNumRows(), in.getData());
+		}
+		else if( group instanceof ColGroupRLE ) {
+			ColGroupRLE in = (ColGroupRLE)group;
+			ret = new ColGroupRLE(colIndices, in.getNumRows(), in.getValues(), 
+					in.getBitmaps(), in.getBitmapOffsets());
+		}
+		else if( group instanceof ColGroupOLE ) {
+			ColGroupOLE in = (ColGroupOLE) group;
+			ret = new ColGroupOLE(colIndices, in.getNumRows(), in.getValues(), 
+					in.getBitmaps(), in.getBitmapOffsets());
+		}
+		
+		return ret;
+	}
+	
+	/**
+	 * 
+	 * @param vector
+	 * @return
+	 */
+	public static double[] getDenseVector( MatrixBlock vector )
+	{
+		if( vector.isInSparseFormat() )
+			return DataConverter.convertToDoubleVector(vector);
+		else 
+			return vector.getDenseBlock();
+	}
+	
+	/**
+	 * 
+	 * @param group
+	 * @return
+	 */
+	public static MatrixBlock getUncompressedColBlock( ColGroup group )
+	{
+		MatrixBlock ret = null;
+		if( group instanceof ColGroupUncompressed ) {
+			ret = ((ColGroupUncompressed) group).getData();
+		}
+		else {
+			ArrayList<ColGroup> tmpGroup = new ArrayList<ColGroup>(Arrays.asList(group));
+			ColGroupUncompressed decompressedCols = new ColGroupUncompressed(tmpGroup);
+			ret = decompressedCols.getData();
+		}
+		
+		return ret;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/utils/DblArray.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/DblArray.java b/src/main/java/org/apache/sysml/runtime/compress/utils/DblArray.java
new file mode 100644
index 0000000..49c163b
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/DblArray.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import java.util.Arrays;
+
+/**
+ * Helper class used for bitmap extraction.
+ *
+ */
+public class DblArray 
+{
+	private double[] _arr = null;
+	private boolean _zero = false;
+	
+	public DblArray() {
+		this(null, false);
+	}
+	
+	public DblArray(double[] arr) {
+		this(arr, false);
+	}
+	
+	public DblArray(DblArray that) {
+		this(Arrays.copyOf(that._arr, that._arr.length), that._zero);
+	}
+
+	public DblArray(double[] arr, boolean allZeros) {
+		_arr = arr;
+		_zero = allZeros;
+	}
+	
+	public double[] getData() {
+		return _arr;
+	}
+	
+	@Override
+	public int hashCode() {
+		return _zero ? 0 : Arrays.hashCode(_arr);
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		return ( o instanceof DblArray
+			&& _zero == ((DblArray) o)._zero
+			&& Arrays.equals(_arr, ((DblArray) o)._arr) );
+	}
+
+	@Override
+	public String toString() {
+		return Arrays.toString(_arr);
+	}
+
+	/**
+	 * 
+	 * @param ds
+	 * @return
+	 */
+	public static boolean isZero(double[] ds) {
+		for (int i = 0; i < ds.length; i++)
+			if (ds[i] != 0.0)
+				return false;
+		return true;
+	}
+
+	/**
+	 * 
+	 * @param val
+	 * @return
+	 */
+	public static boolean isZero(DblArray val) {
+		return val._zero || isZero(val._arr);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/utils/DblArrayIntListHashMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/DblArrayIntListHashMap.java b/src/main/java/org/apache/sysml/runtime/compress/utils/DblArrayIntListHashMap.java
new file mode 100644
index 0000000..a5455ab
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/DblArrayIntListHashMap.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import java.util.ArrayList;
+
+/**
+ * This class provides a memory-efficient replacement for
+ * HashMap<DblArray,IntArrayList> for restricted use cases.
+ * 
+ */
+public class DblArrayIntListHashMap 
+{
+	private static final int INIT_CAPACITY = 8;
+	private static final int RESIZE_FACTOR = 2;
+	private static final float LOAD_FACTOR = 0.75f;
+
+	private DArrayIListEntry[] _data = null;
+	private int _size = -1;
+
+	public DblArrayIntListHashMap() {
+		_data = new DArrayIListEntry[INIT_CAPACITY];
+		_size = 0;
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public int size() {
+		return _size;
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @return
+	 */
+	public IntArrayList get(DblArray key) {
+		// probe for early abort
+		if( _size == 0 )
+			return null;
+
+		// compute entry index position
+		int hash = hash(key);
+		int ix = indexFor(hash, _data.length);
+
+		// find entry
+		for( DArrayIListEntry e = _data[ix]; e != null; e = e.next ) {
+			if( e.key.equals(key) ) {
+				return e.value;
+			}
+		}
+
+		return null;
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @param value
+	 */
+	public void appendValue(DblArray key, IntArrayList value) {
+		// compute entry index position
+		int hash = hash(key);
+		int ix = indexFor(hash, _data.length);
+
+		// add new table entry (constant time)
+		DArrayIListEntry enew = new DArrayIListEntry(key, value);
+		enew.next = _data[ix]; // colliding entries / null
+		_data[ix] = enew;
+		_size++;
+
+		// resize if necessary
+		if( _size >= LOAD_FACTOR * _data.length )
+			resize();
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public ArrayList<DArrayIListEntry> extractValues() {
+		ArrayList<DArrayIListEntry> ret = new ArrayList<DArrayIListEntry>();
+		for( DArrayIListEntry e : _data ) {
+			if( e != null ) {
+				while( e.next != null ) {
+					ret.add(e);
+					e = e.next;
+				}
+				ret.add(e);
+			}
+		}
+
+		return ret;
+	}
+
+	/**
+     * 
+     */
+	private void resize() {
+		// check for integer overflow on resize
+		if( _data.length > Integer.MAX_VALUE / RESIZE_FACTOR )
+			return;
+
+		// resize data array and copy existing contents
+		DArrayIListEntry[] olddata = _data;
+		_data = new DArrayIListEntry[_data.length * RESIZE_FACTOR];
+		_size = 0;
+
+		// rehash all entries
+		for( DArrayIListEntry e : olddata ) {
+			if( e != null ) {
+				while( e.next != null ) {
+					appendValue(e.key, e.value);
+					e = e.next;
+				}
+				appendValue(e.key, e.value);
+			}
+		}
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @return
+	 */
+	private static int hash(DblArray key) {
+		int h = key.hashCode();
+
+		// This function ensures that hashCodes that differ only by
+		// constant multiples at each bit position have a bounded
+		// number of collisions (approximately 8 at default load factor).
+		h ^= (h >>> 20) ^ (h >>> 12);
+		return h ^ (h >>> 7) ^ (h >>> 4);
+	}
+
+	/**
+	 * 
+	 * @param h
+	 * @param length
+	 * @return
+	 */
+	private static int indexFor(int h, int length) {
+		return h & (length - 1);
+	}
+
+	/**
+	 *
+	 */
+	public class DArrayIListEntry {
+		public DblArray key;
+		public IntArrayList value;
+		public DArrayIListEntry next;
+
+		public DArrayIListEntry(DblArray ekey, IntArrayList evalue) {
+			key = ekey;
+			value = evalue;
+			next = null;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/utils/DoubleIntListHashMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/DoubleIntListHashMap.java b/src/main/java/org/apache/sysml/runtime/compress/utils/DoubleIntListHashMap.java
new file mode 100644
index 0000000..5607a3f
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/DoubleIntListHashMap.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import java.util.ArrayList;
+
+/**
+ * This class provides a memory-efficient replacement for
+ * HashMap<Double,IntArrayList> for restricted use cases.
+ * 
+ */
+public class DoubleIntListHashMap 
+{
+	private static final int INIT_CAPACITY = 8;
+	private static final int RESIZE_FACTOR = 2;
+	private static final float LOAD_FACTOR = 0.75f;
+
+	private DIListEntry[] _data = null;
+	private int _size = -1;
+
+	public DoubleIntListHashMap() {
+		_data = new DIListEntry[INIT_CAPACITY];
+		_size = 0;
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public int size() {
+		return _size;
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @return
+	 */
+	public IntArrayList get(double key) {
+		// probe for early abort
+		if( _size == 0 )
+			return null;
+
+		// compute entry index position
+		int hash = hash(key);
+		int ix = indexFor(hash, _data.length);
+
+		// find entry
+		for( DIListEntry e = _data[ix]; e != null; e = e.next ) {
+			if( e.key == key ) {
+				return e.value;
+			}
+		}
+
+		return null;
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @param value
+	 */
+	public void appendValue(double key, IntArrayList value) {
+		// compute entry index position
+		int hash = hash(key);
+		int ix = indexFor(hash, _data.length);
+
+		// add new table entry (constant time)
+		DIListEntry enew = new DIListEntry(key, value);
+		enew.next = _data[ix]; // colliding entries / null
+		_data[ix] = enew;
+		_size++;
+
+		// resize if necessary
+		if( _size >= LOAD_FACTOR * _data.length )
+			resize();
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public ArrayList<DIListEntry> extractValues() {
+		ArrayList<DIListEntry> ret = new ArrayList<DIListEntry>();
+		for( DIListEntry e : _data ) {
+			if (e != null) {
+				while( e.next != null ) {
+					ret.add(e);
+					e = e.next;
+				}
+				ret.add(e);
+			}
+		}
+
+		return ret;
+	}
+
+	/**
+     * 
+     */
+	private void resize() {
+		// check for integer overflow on resize
+		if( _data.length > Integer.MAX_VALUE / RESIZE_FACTOR )
+			return;
+
+		// resize data array and copy existing contents
+		DIListEntry[] olddata = _data;
+		_data = new DIListEntry[_data.length * RESIZE_FACTOR];
+		_size = 0;
+
+		// rehash all entries
+		for( DIListEntry e : olddata ) {
+			if( e != null ) {
+				while( e.next != null ) {
+					appendValue(e.key, e.value);
+					e = e.next;
+				}
+				appendValue(e.key, e.value);
+			}
+		}
+	}
+
+	/**
+	 * 
+	 * @param key
+	 * @return
+	 */
+	private static int hash(double key) {
+		// basic double hash code (w/o object creation)
+		long bits = Double.doubleToRawLongBits(key);
+		int h = (int) (bits ^ (bits >>> 32));
+
+		// This function ensures that hashCodes that differ only by
+		// constant multiples at each bit position have a bounded
+		// number of collisions (approximately 8 at default load factor).
+		h ^= (h >>> 20) ^ (h >>> 12);
+		return h ^ (h >>> 7) ^ (h >>> 4);
+	}
+
+	/**
+	 * 
+	 * @param h
+	 * @param length
+	 * @return
+	 */
+	private static int indexFor(int h, int length) {
+		return h & (length - 1);
+	}
+
+	/**
+	 *
+	 */
+	public class DIListEntry {
+		public double key = Double.MAX_VALUE;
+		public IntArrayList value = null;
+		public DIListEntry next = null;
+
+		public DIListEntry(double ekey, IntArrayList evalue) {
+			key = ekey;
+			value = evalue;
+			next = null;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
new file mode 100644
index 0000000..33455a2
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.utils;
+
+import java.util.Arrays;
+
+/**
+ * This class provides a memory-efficient replacement for ArrayList<Integer> for
+ * restricted use cases.
+ * 
+ */
+public class IntArrayList 
+{
+	private static final int INIT_CAPACITY = 4;
+	private static final int RESIZE_FACTOR = 2;
+
+	private int[] _data = null;
+	private int _size = -1;
+	private int _val0 = -1;
+
+	public IntArrayList() {
+		_data = null;
+		_size = 0;
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public int size() {
+		return _size;
+	}
+
+	/**
+	 * 
+	 * @param value
+	 */
+	public void appendValue(int value) {
+		// embedded value (no array allocation)
+		if( _size == 0 ) {
+			_val0 = value;
+			_size = 1;
+			return;
+		}
+
+		// allocate or resize array if necessary
+		if( _data == null ) {
+			_data = new int[INIT_CAPACITY];
+			_data[0] = _val0;
+		} 
+		else if( _size + 1 >= _data.length ) {
+			resize();
+		}
+
+		// append value
+		_data[_size] = value;
+		_size++;
+	}
+
+	/**
+	 * 
+	 * @return
+	 */
+	public int[] extractValues() {
+		if( _size == 1 )
+			return new int[] { _val0 };
+		else
+			return Arrays.copyOfRange(_data, 0, _size);
+	}
+
+	/**
+	 * 
+	 */
+	private void resize() {
+		// check for integer overflow on resize
+		if( _data.length > Integer.MAX_VALUE / RESIZE_FACTOR )
+			throw new RuntimeException(
+					"IntArrayList resize leads to integer overflow: size=" + _size);
+
+		// resize data array and copy existing contents
+		int[] newdata = new int[_data.length * RESIZE_FACTOR];
+		System.arraycopy(_data, 0, newdata, 0, _size);
+		_data = newdata;
+	}
+}