You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/17 21:31:53 UTC
[3/7] incubator-systemml git commit: [SYSTEMML-766][SYSTEMML-810] Fix
missing licenses / unix line delimiters
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
index a76223c..2764664 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java
@@ -1,191 +1,191 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.math3.random.RandomDataGenerator;
-
-/**
- * Used for the finding columns to co-code
- *
- */
-public class PlanningBinPacker
-{
- private final float _binWeight;
- private final List<Integer> _items;
- private final List<Float> _itemWeights;
-
- public PlanningBinPacker(float binWeight, List<Integer> items, List<Float> itemWeights) {
- _binWeight = binWeight;
- _items = items;
- _itemWeights = itemWeights;
- }
-
- /**
- * NOTE: upper bound is 17/10 OPT
- *
- * @return key: available space, value: list of the bins that have that free space
- */
- public TreeMap<Float, List<List<Integer>>> packFirstFit() {
- return packFirstFit(_items, _itemWeights);
- }
-
- /**
- * shuffling the items to make some potential for having bins of different
- * sizes when consecutive columns are of close cardinalities
- *
- * @return key: available space, value: list of the bins that have that free
- * space
- */
- public TreeMap<Float, List<List<Integer>>> packFirstFitShuffled() {
- RandomDataGenerator rnd = new RandomDataGenerator();
- int[] permutation = rnd.nextPermutation(_items.size(), _items.size());
- List<Integer> shuffledItems = new ArrayList<Integer>(_items.size());
- List<Float> shuffledWeights = new ArrayList<Float>(_items.size());
- for (int ix : permutation) {
- shuffledItems.add(_items.get(ix));
- shuffledWeights.add(_itemWeights.get(ix));
- }
-
- return packFirstFit(shuffledItems, shuffledWeights);
- }
-
- /**
- *
- * @param items
- * @param itemWeights
- * @return
- */
- private TreeMap<Float, List<List<Integer>>> packFirstFit(List<Integer> items, List<Float> itemWeights)
- {
- // when searching for a bin, the first bin in the list is used
- TreeMap<Float, List<List<Integer>>> bins = new TreeMap<Float, List<List<Integer>>>();
- // first bin
- bins.put(_binWeight, createBinList());
- int numItems = items.size();
- for (int i = 0; i < numItems; i++) {
- float itemWeight = itemWeights.get(i);
- Map.Entry<Float, List<List<Integer>>> entry = bins
- .ceilingEntry(itemWeight);
- if (entry == null) {
- // new bin
- float newBinWeight = _binWeight - itemWeight;
- List<List<Integer>> binList = bins.get(newBinWeight);
- if (binList == null) {
- bins.put(newBinWeight, createBinList(items.get(i)));
- } else {
- List<Integer> newBin = new ArrayList<Integer>();
- newBin.add(items.get(i));
- binList.add(newBin);
- }
- } else {
- // add to the first bin in the list
- List<Integer> assignedBin = entry.getValue().remove(0);
- assignedBin.add(items.get(i));
- if (entry.getValue().size() == 0)
- bins.remove(entry.getKey());
- float newBinWeight = entry.getKey() - itemWeight;
- List<List<Integer>> newBinsList = bins.get(newBinWeight);
- if (newBinsList == null) {
- // new bin
- bins.put(newBinWeight, createBinList(assignedBin));
- } else {
- newBinsList.add(assignedBin);
- }
- }
- }
- return bins;
- }
-
- /**
- * NOTE: upper bound is 11/9 OPT + 6/9 (~1.22 OPT)
- *
- * @return
- */
- public TreeMap<Float, List<List<Integer>>> packFirstFitDescending() {
- // sort items descending based on their weights
- Integer[] indexes = new Integer[_items.size()];
- for (int i = 0; i < indexes.length; i++)
- indexes[i] = i;
- Arrays.sort(indexes, new Comparator<Integer>() {
-
- @Override
- public int compare(Integer o1, Integer o2) {
- return _itemWeights.get(o1).compareTo(_itemWeights.get(o2));
- }
- });
- List<Integer> sortedItems = new ArrayList<Integer>();
- List<Float> sortedItemWeights = new ArrayList<Float>();
- for (int i = indexes.length - 1; i >= 0; i--) {
- sortedItems.add(_items.get(i));
- sortedItemWeights.add(_itemWeights.get(i));
- }
- return packFirstFit(sortedItems, sortedItemWeights);
- }
-
- /**
- * NOTE: upper bound is 71/60 OPT + 6/9 (~1.18 OPT)
- *
- * @return
- */
- public TreeMap<Float, List<List<Integer>>> packModifiedFirstFitDescending() {
- throw new UnsupportedOperationException("Not implemented yet!");
- }
-
- /**
- *
- * @return
- */
- private List<List<Integer>> createBinList() {
- List<List<Integer>> binList = new ArrayList<List<Integer>>();
- binList.add(new ArrayList<Integer>());
- return binList;
- }
-
- /**
- *
- * @param item
- * @return
- */
- private List<List<Integer>> createBinList(int item) {
- List<List<Integer>> binList = new ArrayList<List<Integer>>();
- List<Integer> bin = new ArrayList<Integer>();
- binList.add(bin);
- bin.add(item);
- return binList;
- }
-
- /**
- *
- * @param bin
- * @return
- */
- private List<List<Integer>> createBinList(List<Integer> bin) {
- List<List<Integer>> binList = new ArrayList<List<Integer>>();
- binList.add(bin);
- return binList;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.commons.math3.random.RandomDataGenerator;
+
+/**
+ * Used for the finding columns to co-code
+ *
+ */
+public class PlanningBinPacker
+{
+ private final float _binWeight;
+ private final List<Integer> _items;
+ private final List<Float> _itemWeights;
+
+ public PlanningBinPacker(float binWeight, List<Integer> items, List<Float> itemWeights) {
+ _binWeight = binWeight;
+ _items = items;
+ _itemWeights = itemWeights;
+ }
+
+ /**
+ * NOTE: upper bound is 17/10 OPT
+ *
+ * @return key: available space, value: list of the bins that have that free space
+ */
+ public TreeMap<Float, List<List<Integer>>> packFirstFit() {
+ return packFirstFit(_items, _itemWeights);
+ }
+
+ /**
+ * shuffling the items to make some potential for having bins of different
+ * sizes when consecutive columns are of close cardinalities
+ *
+ * @return key: available space, value: list of the bins that have that free
+ * space
+ */
+ public TreeMap<Float, List<List<Integer>>> packFirstFitShuffled() {
+ RandomDataGenerator rnd = new RandomDataGenerator();
+ int[] permutation = rnd.nextPermutation(_items.size(), _items.size());
+ List<Integer> shuffledItems = new ArrayList<Integer>(_items.size());
+ List<Float> shuffledWeights = new ArrayList<Float>(_items.size());
+ for (int ix : permutation) {
+ shuffledItems.add(_items.get(ix));
+ shuffledWeights.add(_itemWeights.get(ix));
+ }
+
+ return packFirstFit(shuffledItems, shuffledWeights);
+ }
+
+ /**
+ *
+ * @param items
+ * @param itemWeights
+ * @return
+ */
+ private TreeMap<Float, List<List<Integer>>> packFirstFit(List<Integer> items, List<Float> itemWeights)
+ {
+ // when searching for a bin, the first bin in the list is used
+ TreeMap<Float, List<List<Integer>>> bins = new TreeMap<Float, List<List<Integer>>>();
+ // first bin
+ bins.put(_binWeight, createBinList());
+ int numItems = items.size();
+ for (int i = 0; i < numItems; i++) {
+ float itemWeight = itemWeights.get(i);
+ Map.Entry<Float, List<List<Integer>>> entry = bins
+ .ceilingEntry(itemWeight);
+ if (entry == null) {
+ // new bin
+ float newBinWeight = _binWeight - itemWeight;
+ List<List<Integer>> binList = bins.get(newBinWeight);
+ if (binList == null) {
+ bins.put(newBinWeight, createBinList(items.get(i)));
+ } else {
+ List<Integer> newBin = new ArrayList<Integer>();
+ newBin.add(items.get(i));
+ binList.add(newBin);
+ }
+ } else {
+ // add to the first bin in the list
+ List<Integer> assignedBin = entry.getValue().remove(0);
+ assignedBin.add(items.get(i));
+ if (entry.getValue().size() == 0)
+ bins.remove(entry.getKey());
+ float newBinWeight = entry.getKey() - itemWeight;
+ List<List<Integer>> newBinsList = bins.get(newBinWeight);
+ if (newBinsList == null) {
+ // new bin
+ bins.put(newBinWeight, createBinList(assignedBin));
+ } else {
+ newBinsList.add(assignedBin);
+ }
+ }
+ }
+ return bins;
+ }
+
+ /**
+ * NOTE: upper bound is 11/9 OPT + 6/9 (~1.22 OPT)
+ *
+ * @return
+ */
+ public TreeMap<Float, List<List<Integer>>> packFirstFitDescending() {
+ // sort items descending based on their weights
+ Integer[] indexes = new Integer[_items.size()];
+ for (int i = 0; i < indexes.length; i++)
+ indexes[i] = i;
+ Arrays.sort(indexes, new Comparator<Integer>() {
+
+ @Override
+ public int compare(Integer o1, Integer o2) {
+ return _itemWeights.get(o1).compareTo(_itemWeights.get(o2));
+ }
+ });
+ List<Integer> sortedItems = new ArrayList<Integer>();
+ List<Float> sortedItemWeights = new ArrayList<Float>();
+ for (int i = indexes.length - 1; i >= 0; i--) {
+ sortedItems.add(_items.get(i));
+ sortedItemWeights.add(_itemWeights.get(i));
+ }
+ return packFirstFit(sortedItems, sortedItemWeights);
+ }
+
+ /**
+ * NOTE: upper bound is 71/60 OPT + 6/9 (~1.18 OPT)
+ *
+ * @return
+ */
+ public TreeMap<Float, List<List<Integer>>> packModifiedFirstFitDescending() {
+ throw new UnsupportedOperationException("Not implemented yet!");
+ }
+
+ /**
+ *
+ * @return
+ */
+ private List<List<Integer>> createBinList() {
+ List<List<Integer>> binList = new ArrayList<List<Integer>>();
+ binList.add(new ArrayList<Integer>());
+ return binList;
+ }
+
+ /**
+ *
+ * @param item
+ * @return
+ */
+ private List<List<Integer>> createBinList(int item) {
+ List<List<Integer>> binList = new ArrayList<List<Integer>>();
+ List<Integer> bin = new ArrayList<Integer>();
+ binList.add(bin);
+ bin.add(item);
+ return binList;
+ }
+
+ /**
+ *
+ * @param bin
+ * @return
+ */
+ private List<List<Integer>> createBinList(List<Integer> bin) {
+ List<List<Integer>> binList = new ArrayList<List<Integer>>();
+ binList.add(bin);
+ return binList;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
index 221e4ca..6553c20 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java
@@ -1,104 +1,104 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
-import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
-
-/**
- * Class to represent information about co-coding a group of columns.
- *
- */
-public class PlanningCoCodingGroup
-{
- private int[] _colIndexes;
- private long _estSize;
- private float _cardRatio;
-
- /**
- * Constructor for a one-column group; i.e. do not co-code a given column.
- *
- */
- public PlanningCoCodingGroup(int col, long estSize, float cardRatio) {
- _colIndexes = new int[]{col};
- _estSize = estSize;
- _cardRatio = cardRatio;
- }
-
- /**
- * Constructor for merging two disjoint groups of columns
- *
- * @param grp1 first group of columns to merge
- * @param grp2 second group to merge
- * @param numRowsWeight numRows x sparsity
- */
- public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2,
- CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight)
- {
- // merge sorted non-empty arrays
- _colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length];
- int grp1Ptr = 0, grp2Ptr = 0;
- for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) {
- if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) {
- _colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++];
- if (grp1Ptr == grp1._colIndexes.length) {
- System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes,
- mergedIx + 1, grp2._colIndexes.length - grp2Ptr);
- break;
- }
- } else {
- _colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++];
- if (grp2Ptr == grp2._colIndexes.length) {
- System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes,
- mergedIx + 1, grp1._colIndexes.length - grp1Ptr);
- break;
- }
- }
- }
-
- // estimating size info
- CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator
- .estimateCompressedColGroupSize(_colIndexes);
- _estSize = groupSizeInfo.getMinSize();
- _cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight;
- }
-
- public int[] getColIndices() {
- return _colIndexes;
- }
-
- /**
- * @return estimated compressed size of the grouped columns
- */
- public long getEstSize() {
- return _estSize;
- }
-
- public float getCardinalityRatio() {
- return _cardRatio;
- }
-
- @Override
- public String toString() {
- return Arrays.toString(_colIndexes);
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
+
+/**
+ * Class to represent information about co-coding a group of columns.
+ *
+ */
+public class PlanningCoCodingGroup
+{
+ private int[] _colIndexes;
+ private long _estSize;
+ private float _cardRatio;
+
+ /**
+ * Constructor for a one-column group; i.e. do not co-code a given column.
+ *
+ */
+ public PlanningCoCodingGroup(int col, long estSize, float cardRatio) {
+ _colIndexes = new int[]{col};
+ _estSize = estSize;
+ _cardRatio = cardRatio;
+ }
+
+ /**
+ * Constructor for merging two disjoint groups of columns
+ *
+ * @param grp1 first group of columns to merge
+ * @param grp2 second group to merge
+ * @param numRowsWeight numRows x sparsity
+ */
+ public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2,
+ CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight)
+ {
+ // merge sorted non-empty arrays
+ _colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length];
+ int grp1Ptr = 0, grp2Ptr = 0;
+ for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) {
+ if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) {
+ _colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++];
+ if (grp1Ptr == grp1._colIndexes.length) {
+ System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes,
+ mergedIx + 1, grp2._colIndexes.length - grp2Ptr);
+ break;
+ }
+ } else {
+ _colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++];
+ if (grp2Ptr == grp2._colIndexes.length) {
+ System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes,
+ mergedIx + 1, grp1._colIndexes.length - grp1Ptr);
+ break;
+ }
+ }
+ }
+
+ // estimating size info
+ CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator
+ .estimateCompressedColGroupSize(_colIndexes);
+ _estSize = groupSizeInfo.getMinSize();
+ _cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight;
+ }
+
+ public int[] getColIndices() {
+ return _colIndexes;
+ }
+
+ /**
+ * @return estimated compressed size of the grouped columns
+ */
+ public long getEstSize() {
+ return _estSize;
+ }
+
+ public float getCardinalityRatio() {
+ return _cardRatio;
+ }
+
+ @Override
+ public String toString() {
+ return Arrays.toString(_colIndexes);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
index 5e3c6c5..47d46d5 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java
@@ -1,73 +1,73 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
-
-/**
- * Internal data structure for tracking potential merges of column groups in
- * co-coding calculations.
- *
- */
-class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction>
-{
- private PlanningCoCodingGroup _leftGrp; //left input
- private PlanningCoCodingGroup _rightGrp; //right input
- private PlanningCoCodingGroup _mergedGrp; //output
- private long _changeInSize;
-
-
- public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator,
- float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) {
- _leftGrp = leftGrp;
- _rightGrp = rightGrp;
- _mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight);
-
- // Negative size change ==> Decrease in size
- _changeInSize = _mergedGrp.getEstSize()
- - leftGrp.getEstSize() - rightGrp.getEstSize();
- }
-
- public int compareTo(PlanningGroupMergeAction o) {
- // We only sort by the change in size
- return (int) Math.signum(_changeInSize - o._changeInSize);
- }
-
- @Override
- public String toString() {
- return String.format("Merge %s and %s", _leftGrp, _rightGrp);
- }
-
- public PlanningCoCodingGroup getLeftGrp() {
- return _leftGrp;
- }
-
- public PlanningCoCodingGroup getRightGrp() {
- return _rightGrp;
- }
-
- public PlanningCoCodingGroup getMergedGrp() {
- return _mergedGrp;
- }
-
- public long getChangeInSize() {
- return _changeInSize;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
+
+/**
+ * Internal data structure for tracking potential merges of column groups in
+ * co-coding calculations.
+ *
+ */
+class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction>
+{
+ private PlanningCoCodingGroup _leftGrp; //left input
+ private PlanningCoCodingGroup _rightGrp; //right input
+ private PlanningCoCodingGroup _mergedGrp; //output
+ private long _changeInSize;
+
+
+ public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator,
+ float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) {
+ _leftGrp = leftGrp;
+ _rightGrp = rightGrp;
+ _mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight);
+
+ // Negative size change ==> Decrease in size
+ _changeInSize = _mergedGrp.getEstSize()
+ - leftGrp.getEstSize() - rightGrp.getEstSize();
+ }
+
+ public int compareTo(PlanningGroupMergeAction o) {
+ // We only sort by the change in size
+ return (int) Math.signum(_changeInSize - o._changeInSize);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Merge %s and %s", _leftGrp, _rightGrp);
+ }
+
+ public PlanningCoCodingGroup getLeftGrp() {
+ return _leftGrp;
+ }
+
+ public PlanningCoCodingGroup getRightGrp() {
+ return _rightGrp;
+ }
+
+ public PlanningCoCodingGroup getMergedGrp() {
+ return _mergedGrp;
+ }
+
+ public long getChangeInSize() {
+ return _changeInSize;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
index a37018f..bd3c13c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java
@@ -1,64 +1,64 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import org.apache.sysml.runtime.compress.utils.DblArray;
-
-/**
- * Base class for all column selection readers.
- *
- */
-public abstract class ReaderColumnSelection
-{
- protected int[] _colIndexes = null;
- protected int _numRows = -1;
- protected int _lastRow = -1;
- protected boolean _skipZeros = false;
-
- protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros) {
- _colIndexes = colIndexes;
- _numRows = numRows;
- _lastRow = -1;
- _skipZeros = skipZeros;
- }
-
- /**
- * Gets the next row, null when no more rows.
- *
- * @return
- */
- public abstract DblArray nextRow();
-
- /**
- *
- * @return
- */
- public int getCurrentRowIndex() {
- return _lastRow;
- }
-
-
- /**
- * Resets the reader to the first row.
- */
- public void reset() {
- _lastRow = -1;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+
+/**
+ * Base class for all column selection readers.
+ *
+ */
+public abstract class ReaderColumnSelection
+{
+ protected int[] _colIndexes = null;
+ protected int _numRows = -1;
+ protected int _lastRow = -1;
+ protected boolean _skipZeros = false;
+
+ protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros) {
+ _colIndexes = colIndexes;
+ _numRows = numRows;
+ _lastRow = -1;
+ _skipZeros = skipZeros;
+ }
+
+ /**
+ * Gets the next row, null when no more rows.
+ *
+ * @return
+ */
+ public abstract DblArray nextRow();
+
+ /**
+ *
+ * @return
+ */
+ public int getCurrentRowIndex() {
+ return _lastRow;
+ }
+
+
+ /**
+ * Resets the reader to the first row.
+ */
+ public void reset() {
+ _lastRow = -1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
index d22f39d..c39eeef 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java
@@ -1,68 +1,68 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import org.apache.sysml.runtime.compress.utils.DblArray;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-
-public class ReaderColumnSelectionDense extends ReaderColumnSelection
-{
- protected MatrixBlock _data;
-
- // reusable return
- private DblArray nonZeroReturn;
- private DblArray reusableReturn;
- private double[] reusableArr;
-
- public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros) {
- super(colIndices, CompressedMatrixBlock.TRANSPOSE_INPUT ?
- data.getNumColumns() : data.getNumRows(), skipZeros);
- _data = data;
- reusableArr = new double[colIndices.length];
- reusableReturn = new DblArray(reusableArr);
- }
-
- @Override
- public DblArray nextRow() {
- if( _skipZeros) {
- while ((nonZeroReturn = getNextRow()) != null
- && DblArray.isZero(nonZeroReturn));
- return nonZeroReturn;
- } else {
- return getNextRow();
- }
- }
-
- /**
- *
- * @return
- */
- private DblArray getNextRow() {
- if(_lastRow == _numRows-1)
- return null;
- _lastRow++;
- for (int i = 0; i < _colIndexes.length; i++) {
- reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ?
- _data.quickGetValue( _colIndexes[i], _lastRow ) :
- _data.quickGetValue( _lastRow, _colIndexes[i] );
- }
- return reusableReturn;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+public class ReaderColumnSelectionDense extends ReaderColumnSelection
+{
+ protected MatrixBlock _data;
+
+ // reusable return
+ private DblArray nonZeroReturn;
+ private DblArray reusableReturn;
+ private double[] reusableArr;
+
+ public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros) {
+ super(colIndices, CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ data.getNumColumns() : data.getNumRows(), skipZeros);
+ _data = data;
+ reusableArr = new double[colIndices.length];
+ reusableReturn = new DblArray(reusableArr);
+ }
+
+ @Override
+ public DblArray nextRow() {
+ if( _skipZeros) {
+ while ((nonZeroReturn = getNextRow()) != null
+ && DblArray.isZero(nonZeroReturn));
+ return nonZeroReturn;
+ } else {
+ return getNextRow();
+ }
+ }
+
+ /**
+ *
+ * @return
+ */
+ private DblArray getNextRow() {
+ if(_lastRow == _numRows-1)
+ return null;
+ _lastRow++;
+ for (int i = 0; i < _colIndexes.length; i++) {
+ reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ _data.quickGetValue( _colIndexes[i], _lastRow ) :
+ _data.quickGetValue( _lastRow, _colIndexes[i] );
+ }
+ return reusableReturn;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
index 06518e4..d8e50c0 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java
@@ -1,86 +1,86 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import org.apache.sysml.runtime.compress.utils.DblArray;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-
-/**
- *
- * considers only a subset of row indexes
- */
-public class ReaderColumnSelectionDenseSample extends ReaderColumnSelection
-{
- protected MatrixBlock _data;
-
- private int[] _sampleIndexes;
- private int lastIndex = -1;
-
- // reusable return
- private DblArray nonZeroReturn;
- private DblArray reusableReturn;
- private double[] reusableArr;
-
- public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes, boolean skipZeros)
- {
- super(colIndexes, -1, skipZeros);
- _data = data;
- _sampleIndexes = sampleIndexes;
- reusableArr = new double[colIndexes.length];
- reusableReturn = new DblArray(reusableArr);
- }
-
- @Override
- public DblArray nextRow() {
- if (_skipZeros) {
- while ((nonZeroReturn = getNextRow()) != null
- && DblArray.isZero(nonZeroReturn));
- return nonZeroReturn;
- } else {
- return getNextRow();
- }
- }
-
- /**
- *
- * @return
- */
- private DblArray getNextRow() {
- if (lastIndex == _sampleIndexes.length - 1)
- return null;
- lastIndex++;
- for (int i = 0; i < _colIndexes.length; i++) {
- reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ?
- _data.quickGetValue(_colIndexes[i], _sampleIndexes[lastIndex]) :
- _data.quickGetValue(_sampleIndexes[lastIndex], _colIndexes[i]);
- }
- return reusableReturn;
- }
-
- @Override
- public int getCurrentRowIndex() {
- return _sampleIndexes[lastIndex];
- }
-
- @Override
- public void reset() {
- lastIndex = -1;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ *
+ * considers only a subset of row indexes
+ */
+public class ReaderColumnSelectionDenseSample extends ReaderColumnSelection
+{
+ protected MatrixBlock _data;
+
+ private int[] _sampleIndexes;
+ private int lastIndex = -1;
+
+ // reusable return
+ private DblArray nonZeroReturn;
+ private DblArray reusableReturn;
+ private double[] reusableArr;
+
+ public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes, boolean skipZeros)
+ {
+ super(colIndexes, -1, skipZeros);
+ _data = data;
+ _sampleIndexes = sampleIndexes;
+ reusableArr = new double[colIndexes.length];
+ reusableReturn = new DblArray(reusableArr);
+ }
+
+ @Override
+ public DblArray nextRow() {
+ if (_skipZeros) {
+ while ((nonZeroReturn = getNextRow()) != null
+ && DblArray.isZero(nonZeroReturn));
+ return nonZeroReturn;
+ } else {
+ return getNextRow();
+ }
+ }
+
+ /**
+ *
+ * @return
+ */
+ private DblArray getNextRow() {
+ if (lastIndex == _sampleIndexes.length - 1)
+ return null;
+ lastIndex++;
+ for (int i = 0; i < _colIndexes.length; i++) {
+ reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ _data.quickGetValue(_colIndexes[i], _sampleIndexes[lastIndex]) :
+ _data.quickGetValue(_sampleIndexes[lastIndex], _colIndexes[i]);
+ }
+ return reusableReturn;
+ }
+
+ @Override
+ public int getCurrentRowIndex() {
+ return _sampleIndexes[lastIndex];
+ }
+
+ @Override
+ public void reset() {
+ lastIndex = -1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
index d2ef5a4..c6adaee 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java
@@ -1,115 +1,115 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.compress.utils.DblArray;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.SparseRow;
-
-/**
- * Used to extract the values at certain indexes from each row in a sparse
- * matrix
- *
- * Keeps returning all-zeros arrays until reaching the last possible index. The
- * current compression algorithm treats the zero-value in a sparse matrix like
- * any other value.
- */
-public class ReaderColumnSelectionSparse extends ReaderColumnSelection
-{
- private final DblArray ZERO_DBL_ARRAY;
- private DblArray nonZeroReturn;
-
- // reusable return
- private DblArray reusableReturn;
- private double[] reusableArr;
-
- // current sparse row positions
- private SparseRow[] sparseCols = null;
- private int[] sparsePos = null;
-
- public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros)
- {
- super(colIndexes, CompressedMatrixBlock.TRANSPOSE_INPUT ?
- data.getNumColumns() : data.getNumRows(), skipZeros);
- ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true);
- reusableArr = new double[colIndexes.length];
- reusableReturn = new DblArray(reusableArr);
-
- if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
- throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
- }
-
- sparseCols = new SparseRow[colIndexes.length];
- sparsePos = new int[colIndexes.length];
- if( data.getSparseBlock()!=null )
- for( int i=0; i<colIndexes.length; i++ )
- sparseCols[i] = data.getSparseBlock().get(colIndexes[i]);
- Arrays.fill(sparsePos, 0);
- }
-
- @Override
- public DblArray nextRow() {
- if(_skipZeros) {
- while ((nonZeroReturn = getNextRow()) != null
- && nonZeroReturn == ZERO_DBL_ARRAY);
- return nonZeroReturn;
- } else {
- return getNextRow();
- }
- }
-
- /**
- *
- * @return
- */
- private DblArray getNextRow()
- {
- if(_lastRow == _numRows-1)
- return null;
- _lastRow++;
-
- if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
- throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
- }
-
- //move pos to current row if necessary (for all columns)
- for( int i=0; i<_colIndexes.length; i++ )
- if( sparseCols[i] != null && (sparseCols[i].indexes().length<=sparsePos[i]
- || sparseCols[i].indexes()[sparsePos[i]]<_lastRow) )
- {
- sparsePos[i]++;
- }
-
- //extract current values
- Arrays.fill(reusableArr, 0);
- boolean zeroResult = true;
- for( int i=0; i<_colIndexes.length; i++ )
- if( sparseCols[i] != null && sparseCols[i].indexes().length>sparsePos[i]
- &&sparseCols[i].indexes()[sparsePos[i]]==_lastRow )
- {
- reusableArr[i] = sparseCols[i].values()[sparsePos[i]];
- zeroResult = false;
- }
-
- return zeroResult ? ZERO_DBL_ARRAY : reusableReturn;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseRow;
+
+/**
+ * Used to extract the values at certain indexes from each row in a sparse
+ * matrix
+ *
+ * Keeps returning all-zeros arrays until reaching the last possible index. The
+ * current compression algorithm treats the zero-value in a sparse matrix like
+ * any other value.
+ */
+public class ReaderColumnSelectionSparse extends ReaderColumnSelection
+{
+ private final DblArray ZERO_DBL_ARRAY;
+ private DblArray nonZeroReturn;
+
+ // reusable return
+ private DblArray reusableReturn;
+ private double[] reusableArr;
+
+ // current sparse row positions
+ private SparseRow[] sparseCols = null;
+ private int[] sparsePos = null;
+
+ public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros)
+ {
+ super(colIndexes, CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ data.getNumColumns() : data.getNumRows(), skipZeros);
+ ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true);
+ reusableArr = new double[colIndexes.length];
+ reusableReturn = new DblArray(reusableArr);
+
+ if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
+ throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
+ }
+
+ sparseCols = new SparseRow[colIndexes.length];
+ sparsePos = new int[colIndexes.length];
+ if( data.getSparseBlock()!=null )
+ for( int i=0; i<colIndexes.length; i++ )
+ sparseCols[i] = data.getSparseBlock().get(colIndexes[i]);
+ Arrays.fill(sparsePos, 0);
+ }
+
+ @Override
+ public DblArray nextRow() {
+ if(_skipZeros) {
+ while ((nonZeroReturn = getNextRow()) != null
+ && nonZeroReturn == ZERO_DBL_ARRAY);
+ return nonZeroReturn;
+ } else {
+ return getNextRow();
+ }
+ }
+
+ /**
+ *
+ * @return
+ */
+ private DblArray getNextRow()
+ {
+ if(_lastRow == _numRows-1)
+ return null;
+ _lastRow++;
+
+ if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){
+ throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input.");
+ }
+
+ //move pos to current row if necessary (for all columns)
+ for( int i=0; i<_colIndexes.length; i++ )
+ if( sparseCols[i] != null && (sparseCols[i].indexes().length<=sparsePos[i]
+ || sparseCols[i].indexes()[sparsePos[i]]<_lastRow) )
+ {
+ sparsePos[i]++;
+ }
+
+ //extract current values
+ Arrays.fill(reusableArr, 0);
+ boolean zeroResult = true;
+ for( int i=0; i<_colIndexes.length; i++ )
+ if( sparseCols[i] != null && sparseCols[i].indexes().length>sparsePos[i]
+ &&sparseCols[i].indexes()[sparsePos[i]]==_lastRow )
+ {
+ reusableArr[i] = sparseCols[i].values()[sparsePos[i]];
+ zeroResult = false;
+ }
+
+ return zeroResult ? ZERO_DBL_ARRAY : reusableReturn;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
index 971f438..84a2c6f 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java
@@ -1,101 +1,101 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry;
-import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
-
-/**
- * Uncompressed representation of one or more columns in bitmap format.
- *
- */
-public final class UncompressedBitmap
-{
- private int _numCols;
-
- /** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/
- private double[] _values;
-
- /** Bitmaps (as lists of offsets) for each of the values. */
- private int[][] _offsetsLists;
-
- public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns )
- {
- // added for one pass bitmap construction
- // Convert inputs to arrays
- int numVals = distinctVals.size();
- _values = new double[numVals*numColumns];
- _offsetsLists = new int[numVals][];
- int bitmapIx = 0;
- for( DArrayIListEntry val : distinctVals.extractValues()) {
- System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns);
- _offsetsLists[bitmapIx++] = val.value.extractValues();
- }
- _numCols = numColumns;
- }
-
- public UncompressedBitmap( DoubleIntListHashMap distinctVals )
- {
- // added for one pass bitmap construction
- // Convert inputs to arrays
- int numVals = distinctVals.size();
- _values = new double[numVals];
- _offsetsLists = new int[numVals][];
- int bitmapIx = 0;
- for(DIListEntry val : distinctVals.extractValues()) {
- _values[bitmapIx] = val.key;
- _offsetsLists[bitmapIx++] = val.value.extractValues();
- }
- _numCols = 1;
- }
-
- public int getNumColumns() {
- return _numCols;
- }
-
- /**
- * @param ix index of a particular distinct value
- * @return the tuple of column values associated with the specified index
- */
- public double[] getValues(int ix) {
- return Arrays.copyOfRange(_values, ix*_numCols, (ix+1)*_numCols);
- }
-
- /**
- * @return number of distinct values in the column; this number is also the
- * number of bitmaps, since there is one bitmap per value
- */
- public int getNumValues() {
- return _values.length / _numCols;
- }
-
- /**
- * @param ix index of a particular distinct value
- * @return IMMUTABLE array of the offsets of the rows containing the value
- * with the indicated index
- */
- public int[] getOffsetsList(int ix) {
- return _offsetsLists[ix];
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
+
+/**
+ * Uncompressed representation of one or more columns in bitmap format.
+ *
+ */
+public final class UncompressedBitmap
+{
+ private int _numCols;
+
+ /** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/
+ private double[] _values;
+
+ /** Bitmaps (as lists of offsets) for each of the values. */
+ private int[][] _offsetsLists;
+
+ public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns )
+ {
+ // added for one pass bitmap construction
+ // Convert inputs to arrays
+ int numVals = distinctVals.size();
+ _values = new double[numVals*numColumns];
+ _offsetsLists = new int[numVals][];
+ int bitmapIx = 0;
+ for( DArrayIListEntry val : distinctVals.extractValues()) {
+ System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns);
+ _offsetsLists[bitmapIx++] = val.value.extractValues();
+ }
+ _numCols = numColumns;
+ }
+
+ public UncompressedBitmap( DoubleIntListHashMap distinctVals )
+ {
+ // added for one pass bitmap construction
+ // Convert inputs to arrays
+ int numVals = distinctVals.size();
+ _values = new double[numVals];
+ _offsetsLists = new int[numVals][];
+ int bitmapIx = 0;
+ for(DIListEntry val : distinctVals.extractValues()) {
+ _values[bitmapIx] = val.key;
+ _offsetsLists[bitmapIx++] = val.value.extractValues();
+ }
+ _numCols = 1;
+ }
+
+ public int getNumColumns() {
+ return _numCols;
+ }
+
+ /**
+ * @param ix index of a particular distinct value
+ * @return the tuple of column values associated with the specified index
+ */
+ public double[] getValues(int ix) {
+ return Arrays.copyOfRange(_values, ix*_numCols, (ix+1)*_numCols);
+ }
+
+ /**
+ * @return number of distinct values in the column; this number is also the
+ * number of bitmaps, since there is one bitmap per value
+ */
+ public int getNumValues() {
+ return _values.length / _numCols;
+ }
+
+ /**
+ * @param ix index of a particular distinct value
+ * @return IMMUTABLE array of the offsets of the rows containing the value
+ * with the indicated index
+ */
+ public int[] getOffsetsList(int ix) {
+ return _offsetsLists[ix];
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
index 1a1ae55..8d0a40b 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java
@@ -1,145 +1,145 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress.estim;
-
-import org.apache.sysml.runtime.compress.BitmapEncoder;
-import org.apache.sysml.runtime.compress.UncompressedBitmap;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-
-/**
- * Base class for all compressed size estimators
- */
-public abstract class CompressedSizeEstimator
-{
- protected MatrixBlock _data;
-
- public CompressedSizeEstimator(MatrixBlock data) {
- _data = data;
- }
-
- /**
- *
- * @param colIndexes
- * @return
- */
- public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes);
-
- /**
- *
- * @param ubm
- * @return
- */
- public abstract CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm);
-
- /**
- *
- * @param ubm
- * @param inclRLE
- * @return
- */
- protected SizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm, boolean inclRLE) {
- int numVals = ubm.getNumValues();
- int numRuns = 0;
- int numOffs = 0;
- int numSegs = 0;
- int numSingle = 0;
-
- //compute size estimation factors
- for (int i = 0; i < numVals; i++) {
- int[] list = ubm.getOffsetsList(i);
- numOffs += list.length;
- numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1;
- numSingle += (list.length==1) ? 1 : 0;
- if( inclRLE ) {
- int lastOff = -2;
- for (int j = 0; j < list.length; j++) {
- if (list[j] != lastOff + 1)
- numRuns++;
- lastOff = list[j];
- }
- }
- }
-
- //construct estimation factors
- return new SizeEstimationFactors(numVals, numSegs, numOffs, numRuns, numSingle);
- }
-
- /**
- * Estimates the number of bytes needed to encode this column group
- * in RLE encoding format.
- *
- * @param numVals
- * @param numRuns
- * @param numCols
- * @return
- */
- protected static long getRLESize(int numVals, int numRuns, int numCols) {
- int ret = 0;
- //distinct value tuples [double per col]
- ret += 8 * numVals * numCols;
- //offset/len fields per distinct value tuple [2xint]
- ret += 8 * numVals;
- //run data [2xchar]
- ret += 4 * numRuns;
- return ret;
- }
-
- /**
- * Estimates the number of bytes needed to encode this column group
- * in OLE format.
- *
- * @param numVals
- * @param numOffs
- * @param numSeqs
- * @param numCols
- * @return
- */
- protected static long getOLESize(int numVals, float numOffs, int numSeqs, int numCols) {
- int ret = 0;
- //distinct value tuples [double per col]
- ret += 8 * numVals * numCols;
- //offset/len fields per distinct value tuple [2xint]
- ret += 8 * numVals;
- //offset list data [1xchar]
- ret += 2 * numOffs;
- //offset list seqment headers [1xchar]
- ret += 2 * numSeqs;
- return ret;
- }
-
- /**
- *
- */
- protected static class SizeEstimationFactors {
- protected int numVals; //num value tuples
- protected int numSegs; //num OLE segments
- protected int numOffs; //num OLE offsets
- protected int numRuns; //num RLE runs
- protected int numSingle; //num singletons
-
- protected SizeEstimationFactors(int numvals, int numsegs, int numoffs, int numruns, int numsingle) {
- numVals = numvals;
- numSegs = numsegs;
- numOffs = numoffs;
- numRuns = numruns;
- numSingle = numsingle;
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.UncompressedBitmap;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Base class for all compressed size estimators
+ */
+public abstract class CompressedSizeEstimator
+{
+ protected MatrixBlock _data;
+
+ public CompressedSizeEstimator(MatrixBlock data) {
+ _data = data;
+ }
+
+ /**
+ *
+ * @param colIndexes
+ * @return
+ */
+ public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes);
+
+ /**
+ *
+ * @param ubm
+ * @return
+ */
+ public abstract CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm);
+
+ /**
+ *
+ * @param ubm
+ * @param inclRLE
+ * @return
+ */
+ protected SizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm, boolean inclRLE) {
+ int numVals = ubm.getNumValues();
+ int numRuns = 0;
+ int numOffs = 0;
+ int numSegs = 0;
+ int numSingle = 0;
+
+ //compute size estimation factors
+ for (int i = 0; i < numVals; i++) {
+ int[] list = ubm.getOffsetsList(i);
+ numOffs += list.length;
+ numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1;
+ numSingle += (list.length==1) ? 1 : 0;
+ if( inclRLE ) {
+ int lastOff = -2;
+ for (int j = 0; j < list.length; j++) {
+ if (list[j] != lastOff + 1)
+ numRuns++;
+ lastOff = list[j];
+ }
+ }
+ }
+
+ //construct estimation factors
+ return new SizeEstimationFactors(numVals, numSegs, numOffs, numRuns, numSingle);
+ }
+
+ /**
+ * Estimates the number of bytes needed to encode this column group
+ * in RLE encoding format.
+ *
+ * @param numVals
+ * @param numRuns
+ * @param numCols
+ * @return
+ */
+ protected static long getRLESize(int numVals, int numRuns, int numCols) {
+ int ret = 0;
+ //distinct value tuples [double per col]
+ ret += 8 * numVals * numCols;
+ //offset/len fields per distinct value tuple [2xint]
+ ret += 8 * numVals;
+ //run data [2xchar]
+ ret += 4 * numRuns;
+ return ret;
+ }
+
+ /**
+ * Estimates the number of bytes needed to encode this column group
+ * in OLE format.
+ *
+ * @param numVals
+ * @param numOffs
+ * @param numSeqs
+ * @param numCols
+ * @return
+ */
+ protected static long getOLESize(int numVals, float numOffs, int numSeqs, int numCols) {
+ int ret = 0;
+ //distinct value tuples [double per col]
+ ret += 8 * numVals * numCols;
+ //offset/len fields per distinct value tuple [2xint]
+ ret += 8 * numVals;
+ //offset list data [1xchar]
+ ret += 2 * numOffs;
+ //offset list seqment headers [1xchar]
+ ret += 2 * numSeqs;
+ return ret;
+ }
+
+ /**
+ *
+ */
+ protected static class SizeEstimationFactors {
+ protected int numVals; //num value tuples
+ protected int numSegs; //num OLE segments
+ protected int numOffs; //num OLE offsets
+ protected int numRuns; //num RLE runs
+ protected int numSingle; //num singletons
+
+ protected SizeEstimationFactors(int numvals, int numsegs, int numoffs, int numruns, int numsingle) {
+ numVals = numvals;
+ numSegs = numsegs;
+ numOffs = numoffs;
+ numRuns = numruns;
+ numSingle = numsingle;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
index 557c518..d24255d 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java
@@ -1,53 +1,53 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress.estim;
-
-import org.apache.sysml.runtime.compress.BitmapEncoder;
-import org.apache.sysml.runtime.compress.UncompressedBitmap;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-
-/**
- * Exact compressed size estimator (examines entire dataset).
- *
- */
-public class CompressedSizeEstimatorExact extends CompressedSizeEstimator
-{
- public CompressedSizeEstimatorExact(MatrixBlock data) {
- super(data);
- }
-
- @Override
- public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) {
- return estimateCompressedColGroupSize(
- BitmapEncoder.extractBitmap(colIndexes, _data));
- }
-
- @Override
- public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm)
- {
- //compute size estimation factors
- SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
-
- //construct new size info summary
- return new CompressedSizeInfo(fact.numVals,
- getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
- getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress.estim;
+
+import org.apache.sysml.runtime.compress.BitmapEncoder;
+import org.apache.sysml.runtime.compress.UncompressedBitmap;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Exact compressed size estimator (examines entire dataset).
+ *
+ */
+public class CompressedSizeEstimatorExact extends CompressedSizeEstimator
+{
+ public CompressedSizeEstimatorExact(MatrixBlock data) {
+ super(data);
+ }
+
+ @Override
+ public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) {
+ return estimateCompressedColGroupSize(
+ BitmapEncoder.extractBitmap(colIndexes, _data));
+ }
+
+ @Override
+ public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm)
+ {
+ //compute size estimation factors
+ SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true);
+
+ //construct new size info summary
+ return new CompressedSizeInfo(fact.numVals,
+ getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()),
+ getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns()));
+ }
+}