You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/17 21:31:57 UTC
[7/7] incubator-systemml git commit: [SYSTEMML-766][SYSTEMML-810] Fix
missing licenses / unix line delimiters
[SYSTEMML-766][SYSTEMML-810] Fix missing licenses / unix line delimiters
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/da318739
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/da318739
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/da318739
Branch: refs/heads/master
Commit: da3187392cb67e8a5bd3a494fae68b8dfbc0d883
Parents: 2b7fdb2
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Jul 17 14:31:27 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Jul 17 14:31:27 2016 -0700
----------------------------------------------------------------------
.../runtime/compress/BitmapDecoderOLE.java | 254 +-
.../runtime/compress/BitmapDecoderRLE.java | 234 +-
.../sysml/runtime/compress/BitmapEncoder.java | 784 ++---
.../apache/sysml/runtime/compress/ColGroup.java | 518 ++--
.../sysml/runtime/compress/ColGroupBitmap.java | 996 +++----
.../sysml/runtime/compress/ColGroupOLE.java | 1268 ++++-----
.../sysml/runtime/compress/ColGroupRLE.java | 1234 ++++----
.../runtime/compress/ColGroupUncompressed.java | 720 ++---
.../runtime/compress/CompressedMatrixBlock.java | 2684 +++++++++---------
.../runtime/compress/PlanningBinPacker.java | 382 +--
.../runtime/compress/PlanningCoCodingGroup.java | 206 +-
.../compress/PlanningGroupMergeAction.java | 146 +-
.../runtime/compress/ReaderColumnSelection.java | 128 +-
.../compress/ReaderColumnSelectionDense.java | 136 +-
.../ReaderColumnSelectionDenseSample.java | 172 +-
.../compress/ReaderColumnSelectionSparse.java | 230 +-
.../runtime/compress/UncompressedBitmap.java | 202 +-
.../compress/estim/CompressedSizeEstimator.java | 290 +-
.../estim/CompressedSizeEstimatorExact.java | 106 +-
.../estim/CompressedSizeEstimatorSample.java | 1534 +++++-----
.../compress/estim/CompressedSizeInfo.java | 138 +-
.../sysml/runtime/compress/utils/DblArray.java | 182 +-
.../compress/utils/DblArrayIntListHashMap.java | 358 +--
.../compress/utils/DoubleIntListHashMap.java | 362 +--
.../compress/utils/LinearAlgebraUtils.java | 766 ++---
.../instructions/cp/PlusMultCPInstruction.java | 19 +
src/test/scripts/functions/compress/LinregCG.R | 72 +-
.../scripts/functions/compress/LinregCG.dml | 70 +-
28 files changed, 7105 insertions(+), 7086 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
index fc6e861..539d1f5 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
@@ -1,127 +1,127 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Arrays;
-import java.util.Iterator;
-
-/**
- * General-purpose iterator to decode a compressed OLE bitmap.
- *
- */
-public final class BitmapDecoderOLE implements Iterator<Integer>
-{
- // pointer to the compressed bitmap
- private int _bmOff;
- private int _bmLen;
- private char[] _bmPtr;
-
- // The index of the current block. Block 0 covers bits 1 through 2^16
- private int _blockIx;
-
- // The offset where the current block starts within the bitmap.
- private int _blockStartOffset;
-
- // The number of offsets in the current block.
- private int _curBlockSize;
-
- // The offset <b>in the current block</b> the <b>next</b> element we will
- // read from the bitmap, or bmPtr.length if we are done.
- private int _nextBmOffset;
-
- /**
- * Point this object at the beginning of a particular bitmap. After a call
- * to this method, the next call to {@link #nextOffset()} will return the
- * offset of the first bit in the specified bitmap.
- *
- * @param bmPtr
- * pointer to a compressed bitmap
- */
- public BitmapDecoderOLE(char[] bmPtr, int off, int len) {
- _bmOff = off;
- _bmLen = len;
- _bmPtr = bmPtr;
- _blockIx = 0;
- _blockStartOffset = 0;
- _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
- if (_curBlockSize < 0) {
- throw new RuntimeException(String.format(
- "Negative block size %d at position %d of %s",
- _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr)));
- }
- _nextBmOffset = 0;
-
- // Advance past any zero-length blocks at the beginning of the array
- while (_blockStartOffset < _bmLen
- && _nextBmOffset >= _curBlockSize) {
- advanceToNextBlock();
- }
- }
-
- @Override
- public Integer next() {
- if( !hasNext() )
- throw new RuntimeException("No next offset existing.");
-
- // Grab the lookahead value Note the +1 in the array indexing;
- // the first number in a block is the block size
- int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset];
- int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ)
- + offsetFromBlockBegin;
- _nextBmOffset++;
-
- // Advance to next non-empty block if we reached the end of the block.
- while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) {
- advanceToNextBlock();
- }
-
- return ret;
- }
-
- @Override
- public boolean hasNext() {
- return _blockStartOffset < _bmLen;
- }
-
- @Override
- public void remove() {
- throw new RuntimeException("Not implemented for BitmapDecoderOLE.");
- }
-
- /**
- * Move forward to the next block. Does not skip empty blocks.
- */
- private void advanceToNextBlock() {
- _blockStartOffset += (1 + _curBlockSize);
- _blockIx++;
- if (_blockStartOffset >= _bmLen) {
- // Read past last block
- return;
- }
-
- _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
- if (_curBlockSize < 0) {
- throw new RuntimeException(String.format(
- "Negative block size %d at position %d of %s",
- _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr)));
- }
- _nextBmOffset = 0;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ *
+ */
+public final class BitmapDecoderOLE implements Iterator<Integer>
+{
+ // pointer to the compressed bitmap
+ private int _bmOff;
+ private int _bmLen;
+ private char[] _bmPtr;
+
+ // The index of the current block. Block 0 covers bits 1 through 2^16
+ private int _blockIx;
+
+ // The offset where the current block starts within the bitmap.
+ private int _blockStartOffset;
+
+ // The number of offsets in the current block.
+ private int _curBlockSize;
+
+ // The offset <b>in the current block</b> the <b>next</b> element we will
+ // read from the bitmap, or bmPtr.length if we are done.
+ private int _nextBmOffset;
+
+ /**
+ * Point this object at the beginning of a particular bitmap. After a call
+ * to this method, the next call to {@link #nextOffset()} will return the
+ * offset of the first bit in the specified bitmap.
+ *
+ * @param bmPtr
+ * pointer to a compressed bitmap
+ */
+ public BitmapDecoderOLE(char[] bmPtr, int off, int len) {
+ _bmOff = off;
+ _bmLen = len;
+ _bmPtr = bmPtr;
+ _blockIx = 0;
+ _blockStartOffset = 0;
+ _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+ if (_curBlockSize < 0) {
+ throw new RuntimeException(String.format(
+ "Negative block size %d at position %d of %s",
+ _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr)));
+ }
+ _nextBmOffset = 0;
+
+ // Advance past any zero-length blocks at the beginning of the array
+ while (_blockStartOffset < _bmLen
+ && _nextBmOffset >= _curBlockSize) {
+ advanceToNextBlock();
+ }
+ }
+
+ @Override
+ public Integer next() {
+ if( !hasNext() )
+ throw new RuntimeException("No next offset existing.");
+
+ // Grab the lookahead value Note the +1 in the array indexing;
+ // the first number in a block is the block size
+ int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset];
+ int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ)
+ + offsetFromBlockBegin;
+ _nextBmOffset++;
+
+ // Advance to next non-empty block if we reached the end of the block.
+ while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) {
+ advanceToNextBlock();
+ }
+
+ return ret;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return _blockStartOffset < _bmLen;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented for BitmapDecoderOLE.");
+ }
+
+ /**
+ * Move forward to the next block. Does not skip empty blocks.
+ */
+ private void advanceToNextBlock() {
+ _blockStartOffset += (1 + _curBlockSize);
+ _blockIx++;
+ if (_blockStartOffset >= _bmLen) {
+ // Read past last block
+ return;
+ }
+
+ _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+ if (_curBlockSize < 0) {
+ throw new RuntimeException(String.format(
+ "Negative block size %d at position %d of %s",
+ _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr)));
+ }
+ _nextBmOffset = 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
index 54f24ae..f2e05a4 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
@@ -1,117 +1,117 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.Iterator;
-
-/**
- * General-purpose iterator to decode a compressed OLE bitmap.
- *
- */
-public final class BitmapDecoderRLE implements Iterator<Integer>
-{
- // pointer to the compressed bitmap
- private int _bmOff;
- private int _bmLen;
- private char[] _bmPtr;
-
- // The offset of the <b>next</b> element we will read from the bitmap, or
- // bmPtr.length if we are done.
- private int _nextBmOffset;
-
- // The offset in the matrix column of the beginning of the current run
- private int _runStartOffset;
-
- // The length of the current run
- private int _curRunLen;
-
- // The number of bits that we have returned from the current run.
- private int _runBitsReturned;
-
- /**
- * Point this object at the beginning of a particular bitmap. After a call
- * to this method, the next call to {@link #nextOffset()} will return the
- * offset of the first bit in the specified bitmap.
- *
- * @param bmPtr
- * pointer to a compressed bitmap
- */
- public BitmapDecoderRLE(char[] bmPtr, int off, int len) {
- _bmOff = off;
- _bmLen = len;
- _bmPtr = bmPtr;
- _nextBmOffset = 0;
- _runStartOffset = 0;
- _curRunLen = 0;
- _runBitsReturned = 0;
-
- if (0 == _bmLen) {
- return; //no runs
- }
-
- // Advance to the beginning of the first non-empty run.
- advanceToNextRun();
- }
-
- @Override
- public Integer next() {
- if( !hasNext() )
- throw new RuntimeException("No next offset existing.");
-
- // Grab the lookahead value
- int ret = _runStartOffset + _runBitsReturned;
-
- _runBitsReturned++;
-
- // Check for end of run
- if (_runBitsReturned == _curRunLen) {
- advanceToNextRun();
- }
-
- return ret;
- }
-
- @Override
- public boolean hasNext() {
- return _runBitsReturned < _curRunLen;
- }
-
- @Override
- public void remove() {
- throw new RuntimeException("Not implemented for BitmapDecoderRLE.");
- }
-
- /** Move forward to the next non-empty run. */
- private void advanceToNextRun() {
- // While loop needed because some runs are of length 0
- while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) {
-
- _runBitsReturned = 0;
-
- // Read the distance to the next run
- char delta = _bmPtr[_bmOff + _nextBmOffset];
-
- // Run length is stored in the next element of the array
- _runStartOffset += delta + _curRunLen;
- _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1];
- _nextBmOffset += 2;
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ *
+ */
+public final class BitmapDecoderRLE implements Iterator<Integer>
+{
+ // pointer to the compressed bitmap
+ private int _bmOff;
+ private int _bmLen;
+ private char[] _bmPtr;
+
+ // The offset of the <b>next</b> element we will read from the bitmap, or
+ // bmPtr.length if we are done.
+ private int _nextBmOffset;
+
+ // The offset in the matrix column of the beginning of the current run
+ private int _runStartOffset;
+
+ // The length of the current run
+ private int _curRunLen;
+
+ // The number of bits that we have returned from the current run.
+ private int _runBitsReturned;
+
+ /**
+ * Point this object at the beginning of a particular bitmap. After a call
+ * to this method, the next call to {@link #nextOffset()} will return the
+ * offset of the first bit in the specified bitmap.
+ *
+ * @param bmPtr
+ * pointer to a compressed bitmap
+ */
+ public BitmapDecoderRLE(char[] bmPtr, int off, int len) {
+ _bmOff = off;
+ _bmLen = len;
+ _bmPtr = bmPtr;
+ _nextBmOffset = 0;
+ _runStartOffset = 0;
+ _curRunLen = 0;
+ _runBitsReturned = 0;
+
+ if (0 == _bmLen) {
+ return; //no runs
+ }
+
+ // Advance to the beginning of the first non-empty run.
+ advanceToNextRun();
+ }
+
+ @Override
+ public Integer next() {
+ if( !hasNext() )
+ throw new RuntimeException("No next offset existing.");
+
+ // Grab the lookahead value
+ int ret = _runStartOffset + _runBitsReturned;
+
+ _runBitsReturned++;
+
+ // Check for end of run
+ if (_runBitsReturned == _curRunLen) {
+ advanceToNextRun();
+ }
+
+ return ret;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return _runBitsReturned < _curRunLen;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented for BitmapDecoderRLE.");
+ }
+
+ /** Move forward to the next non-empty run. */
+ private void advanceToNextRun() {
+ // While loop needed because some runs are of length 0
+ while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) {
+
+ _runBitsReturned = 0;
+
+ // Read the distance to the next run
+ char delta = _bmPtr[_bmOff + _nextBmOffset];
+
+ // Run length is stored in the next element of the array
+ _runStartOffset += delta + _curRunLen;
+ _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1];
+ _nextBmOffset += 2;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
index 8a535e1..75ddf9a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
@@ -1,392 +1,392 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.compress.utils.DblArray;
-import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
-import org.apache.sysml.runtime.compress.utils.IntArrayList;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.SparseBlock;
-
-
-/**
- * Static functions for encoding bitmaps in various ways.
- *
- */
-public class BitmapEncoder
-{
- /** Size of the blocks used in a blocked bitmap representation. */
- public static final int BITMAP_BLOCK_SZ = 65536;
-
- /**
- * Generate uncompressed bitmaps for a set of columns in an uncompressed
- * matrix block.
- *
- * @param colIndices
- * indexes (within the block) of the columns to extract
- * @param rawblock
- * an uncompressed matrix block; can be dense or sparse
- * @return uncompressed bitmap representation of the columns
- * @throws DMLRuntimeException
- */
- public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock)
- {
- //note: no sparse column selection reader because low potential
- //single column selection
- if( colIndices.length==1 ) {
- return extractBitmap(colIndices[0], rawblock,
- !CompressedMatrixBlock.MATERIALIZE_ZEROS);
- }
- //multiple column selection (general case)
- else {
- ReaderColumnSelection reader = null;
- if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT )
- reader = new ReaderColumnSelectionSparse(rawblock, colIndices,
- !CompressedMatrixBlock.MATERIALIZE_ZEROS);
- else
- reader = new ReaderColumnSelectionDense(rawblock, colIndices,
- !CompressedMatrixBlock.MATERIALIZE_ZEROS);
-
- return extractBitmap(colIndices, rawblock, reader);
- }
- }
-
- /**
- *
- * @param colIndices
- * @param rawblock
- * @param sampleIndexes
- * @return
- */
- public static UncompressedBitmap extractBitmapFromSample(int[] colIndices,
- MatrixBlock rawblock, int[] sampleIndexes)
- {
- //note: no sparse column selection reader because low potential
-
- //single column selection
- if( colIndices.length==1 ) {
- return extractBitmap(colIndices[0], rawblock, sampleIndexes,
- !CompressedMatrixBlock.MATERIALIZE_ZEROS);
- }
- //multiple column selection (general case)
- else {
- return extractBitmap(colIndices, rawblock,
- new ReaderColumnSelectionDenseSample(rawblock, colIndices,
- sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS));
- }
- }
-
- /**
- * Encodes the bitmap as a series of run lengths and offsets.
- * <p>
- * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE}
- * !</b>
- *
- * @param offsets
- * uncompressed contents of the bitmap, expressed as a list of
- * the offsets of different bits
- * @return compressed version of said bitmap
- */
- public static char[] genRLEBitmap(int[] offsets) {
- if( offsets.length == 0 )
- return new char[0]; //empty list
-
- // Use an ArrayList for correctness at the expense of temp space
- ArrayList<Character> buf = new ArrayList<Character>();
-
- // 1 + (position of last 1 in the previous run of 1's)
- // We add 1 because runs may be of length zero.
- int lastRunEnd = 0;
-
- // Offset between the end of the previous run of 1's and the first 1 in
- // the current run. Initialized below.
- int curRunOff;
-
- // Length of the most recent run of 1's
- int curRunLen = 0;
-
- // Current encoding is as follows:
- // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
- // Positive entry: Entry encodes offset to next run of 1's. The next
- // entry in the bitmap holds a run length.
-
- // Special-case the first run to simplify the loop below.
- int firstOff = offsets[0];
-
- // The first run may start more than a short's worth of bits in
- while (firstOff > Character.MAX_VALUE) {
- buf.add(Character.MAX_VALUE);
- buf.add((char) 0);
- firstOff -= Character.MAX_VALUE;
- lastRunEnd += Character.MAX_VALUE;
- }
-
- // Create the first run with an initial size of 1
- curRunOff = firstOff;
- curRunLen = 1;
-
- // Process the remaining offsets
- for (int i = 1; i < offsets.length; i++) {
-
- int absOffset = offsets[i];
-
- // 1 + (last position in run)
- int curRunEnd = lastRunEnd + curRunOff + curRunLen;
-
- if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
- // End of a run, either because we hit a run of 0's or because the
- // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
- buf.add((char) curRunOff);
- buf.add((char) curRunLen);
-
- lastRunEnd = curRunEnd;
- curRunOff = absOffset - lastRunEnd;
-
- while (curRunOff > Character.MAX_VALUE) {
- // SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
- // Add zero-length runs until the offset is small enough.
- buf.add(Character.MAX_VALUE);
- buf.add((char) 0);
- lastRunEnd += Character.MAX_VALUE;
- curRunOff -= Character.MAX_VALUE;
- }
-
- curRunLen = 1;
- } else {
- // Middle of a run
- curRunLen++;
- }
- }
-
- // Close out the last run
- if (curRunLen >= 1) {
- buf.add((char) curRunOff);
- buf.add((char) curRunLen);
- }
-
- // Convert wasteful ArrayList to packed array.
- char[] ret = new char[buf.size()];
- for (int i = 0; i < buf.size(); i++) {
- ret[i] = buf.get(i);
- }
- return ret;
- }
-
- /**
- * Encodes the bitmap in blocks of offsets. Within each block, the bits are
- * stored as absolute offsets from the start of the block.
- *
- * @param offsets
- * uncompressed contents of the bitmap, expressed as a list of
- * the offsets of different bits
- * @return compressed version of said bitmap
- */
- public static char[] genOffsetBitmap(int[] offsets)
- {
- int lastOffset = offsets[offsets.length - 1];
-
- // Build up the blocks
- int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
- // To simplify the logic, we make two passes.
- // The first pass divides the offsets by block.
- int[] blockLengths = new int[numBlocks];
- Arrays.fill(blockLengths, 0);
-
- for (int ix = 0; ix < offsets.length; ix++) {
- int val = offsets[ix];
- int blockForVal = val / BITMAP_BLOCK_SZ;
-
- blockLengths[blockForVal]++;
- }
-
- // The second pass creates the blocks.
- int totalSize = numBlocks;
- for (int block = 0; block < numBlocks; block++) {
- totalSize += blockLengths[block];
- }
- char[] encodedBlocks = new char[totalSize];
-
- int inputIx = 0;
- int blockStartIx = 0;
- for (int block = 0; block < numBlocks; block++) {
- int blockSz = blockLengths[block];
-
- // First entry in the block is number of bits
- encodedBlocks[blockStartIx] = (char) blockSz;
-
- for (int i = 0; i < blockSz; i++) {
- encodedBlocks[blockStartIx + i + 1] = (char)
- (offsets[inputIx+i] % BITMAP_BLOCK_SZ);
- }
-
- inputIx += blockSz;
- blockStartIx += blockSz + 1;
- }
-
- return encodedBlocks;
- }
-
- /**
- *
- * @param colIndex
- * @param rawblock
- * @return
- * @throws DMLRuntimeException
- */
- private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros)
- {
- //probe map for distinct items (for value or value groups)
- DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
-
- //scan rows and probe/build distinct items
- final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ?
- rawblock.getNumColumns():rawblock.getNumRows();
-
- if( rawblock.isInSparseFormat() //SPARSE
- && CompressedMatrixBlock.TRANSPOSE_INPUT )
- {
- SparseBlock a = rawblock.getSparseBlock();
- if( a != null && !a.isEmpty(colIndex) )
- {
- int apos = a.pos(colIndex);
- int alen = a.size(colIndex);
- int[] aix = a.indexes(colIndex);
- double[] avals = a.values(colIndex);
-
- IntArrayList lstPtr0 = new IntArrayList(); //for 0 values
- int last = -1;
- //iterate over non-zero entries but fill in zeros
- for( int j=apos; j<apos+alen; j++ )
- {
- //fill in zero values
- if( !skipZeros )
- for( int k=last+1; k<aix[j]; k++ )
- lstPtr0.appendValue(k);
- //handle non-zero value
- IntArrayList lstPtr = distinctVals.get(avals[j]);
- if( lstPtr == null ) {
- lstPtr = new IntArrayList();
- distinctVals.appendValue(avals[j], lstPtr);
- }
- lstPtr.appendValue(aix[j]);
- last = aix[j];
- }
- //fill in remaining zero values
- if( !skipZeros ) {
- for( int k=last+1; k<m; k++ )
- lstPtr0.appendValue(k);
- if( lstPtr0.size()>0 )
- distinctVals.appendValue(0, lstPtr0);
- }
- }
- else if( !skipZeros ) { //full 0 column
- IntArrayList lstPtr = new IntArrayList();
- for( int i=0; i<m; i++ )
- lstPtr.appendValue(i);
- distinctVals.appendValue(0, lstPtr);
- }
- }
- else //GENERAL CASE
- {
- for( int i=0; i<m; i++ ) {
- double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
- rawblock.quickGetValue(colIndex, i):
- rawblock.quickGetValue(i, colIndex);
- if( val!=0 || !skipZeros ) {
- IntArrayList lstPtr = distinctVals.get(val);
- if( lstPtr == null ) {
- lstPtr = new IntArrayList();
- distinctVals.appendValue(val, lstPtr);
- }
- lstPtr.appendValue(i);
- }
- }
- }
-
- return new UncompressedBitmap(distinctVals);
- }
-
- /**
- *
- * @param colIndex
- * @param rawblock
- * @param sampleIndexes
- * @return
- */
- private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros)
- {
- //note: general case only because anyway binary search for small samples
-
- //probe map for distinct items (for value or value groups)
- DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
-
- //scan rows and probe/build distinct items
- final int m = sampleIndexes.length;
- for( int i=0; i<m; i++ ) {
- int rowIndex = sampleIndexes[i];
- double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
- rawblock.quickGetValue(colIndex, rowIndex) :
- rawblock.quickGetValue(rowIndex, colIndex);
- if( val!=0 || !skipZeros ) {
- IntArrayList lstPtr = distinctVals.get(val);
- if( lstPtr == null ) {
- lstPtr = new IntArrayList();
- distinctVals.appendValue(val, lstPtr);
- }
- lstPtr.appendValue(i);
- }
- }
-
- return new UncompressedBitmap(distinctVals);
- }
-
- /**
- *
- * @param colIndices
- * @param rawblock
- * @param rowReader
- * @return
- */
- private static UncompressedBitmap extractBitmap(int[] colIndices,
- MatrixBlock rawblock, ReaderColumnSelection rowReader)
- {
- //probe map for distinct items (for value or value groups)
- DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
-
- //scan rows and probe/build distinct items
- DblArray cellVals = null;
- while ((cellVals = rowReader.nextRow()) != null) {
- IntArrayList lstPtr = distinctVals.get(cellVals);
- if (lstPtr == null) {
- //create new objects only on demand
- lstPtr = new IntArrayList();
- distinctVals.appendValue(new DblArray(cellVals), lstPtr);
- }
- lstPtr.appendValue(rowReader.getCurrentRowIndex());
- }
-
- return new UncompressedBitmap(distinctVals, colIndices.length);
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.IntArrayList;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+
+/**
+ * Static functions for encoding bitmaps in various ways.
+ *
+ */
+public class BitmapEncoder
+{
+ /** Size of the blocks used in a blocked bitmap representation. */
+ public static final int BITMAP_BLOCK_SZ = 65536;
+
+ /**
+ * Generate uncompressed bitmaps for a set of columns in an uncompressed
+ * matrix block.
+ *
+ * @param colIndices
+ * indexes (within the block) of the columns to extract
+ * @param rawblock
+ * an uncompressed matrix block; can be dense or sparse
+ * @return uncompressed bitmap representation of the columns
+ * @throws DMLRuntimeException
+ */
+ public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock)
+ {
+ //note: no sparse column selection reader because low potential
+ //single column selection
+ if( colIndices.length==1 ) {
+ return extractBitmap(colIndices[0], rawblock,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ }
+ //multiple column selection (general case)
+ else {
+ ReaderColumnSelection reader = null;
+ if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT )
+ reader = new ReaderColumnSelectionSparse(rawblock, colIndices,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ else
+ reader = new ReaderColumnSelectionDense(rawblock, colIndices,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+
+ return extractBitmap(colIndices, rawblock, reader);
+ }
+ }
+
+ /**
+ *
+ * @param colIndices
+ * @param rawblock
+ * @param sampleIndexes
+ * @return
+ */
+ public static UncompressedBitmap extractBitmapFromSample(int[] colIndices,
+ MatrixBlock rawblock, int[] sampleIndexes)
+ {
+ //note: no sparse column selection reader because low potential
+
+ //single column selection
+ if( colIndices.length==1 ) {
+ return extractBitmap(colIndices[0], rawblock, sampleIndexes,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ }
+ //multiple column selection (general case)
+ else {
+ return extractBitmap(colIndices, rawblock,
+ new ReaderColumnSelectionDenseSample(rawblock, colIndices,
+ sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS));
+ }
+ }
+
+ /**
+ * Encodes the bitmap as a series of run lengths and offsets.
+ * <p>
+ * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE}
+ * !</b>
+ *
+ * @param offsets
+ * uncompressed contents of the bitmap, expressed as a list of
+ * the offsets of different bits
+ * @return compressed version of said bitmap
+ */
+ public static char[] genRLEBitmap(int[] offsets) {
+ if( offsets.length == 0 )
+ return new char[0]; //empty list
+
+ // Use an ArrayList for correctness at the expense of temp space
+ ArrayList<Character> buf = new ArrayList<Character>();
+
+ // 1 + (position of last 1 in the previous run of 1's)
+ // We add 1 because runs may be of length zero.
+ int lastRunEnd = 0;
+
+ // Offset between the end of the previous run of 1's and the first 1 in
+ // the current run. Initialized below.
+ int curRunOff;
+
+ // Length of the most recent run of 1's
+ int curRunLen = 0;
+
+ // Current encoding is as follows:
+ // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
+ // Positive entry: Entry encodes offset to next run of 1's. The next
+ // entry in the bitmap holds a run length.
+
+ // Special-case the first run to simplify the loop below.
+ int firstOff = offsets[0];
+
+ // The first run may start more than a short's worth of bits in
+ while (firstOff > Character.MAX_VALUE) {
+ buf.add(Character.MAX_VALUE);
+ buf.add((char) 0);
+ firstOff -= Character.MAX_VALUE;
+ lastRunEnd += Character.MAX_VALUE;
+ }
+
+ // Create the first run with an initial size of 1
+ curRunOff = firstOff;
+ curRunLen = 1;
+
+ // Process the remaining offsets
+ for (int i = 1; i < offsets.length; i++) {
+
+ int absOffset = offsets[i];
+
+ // 1 + (last position in run)
+ int curRunEnd = lastRunEnd + curRunOff + curRunLen;
+
+ if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
+ // End of a run, either because we hit a run of 0's or because the
+ // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
+ buf.add((char) curRunOff);
+ buf.add((char) curRunLen);
+
+ lastRunEnd = curRunEnd;
+ curRunOff = absOffset - lastRunEnd;
+
+ while (curRunOff > Character.MAX_VALUE) {
+ // SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
+ // Add zero-length runs until the offset is small enough.
+ buf.add(Character.MAX_VALUE);
+ buf.add((char) 0);
+ lastRunEnd += Character.MAX_VALUE;
+ curRunOff -= Character.MAX_VALUE;
+ }
+
+ curRunLen = 1;
+ } else {
+ // Middle of a run
+ curRunLen++;
+ }
+ }
+
+ // Close out the last run
+ if (curRunLen >= 1) {
+ buf.add((char) curRunOff);
+ buf.add((char) curRunLen);
+ }
+
+ // Convert wasteful ArrayList to packed array.
+ char[] ret = new char[buf.size()];
+ for (int i = 0; i < buf.size(); i++) {
+ ret[i] = buf.get(i);
+ }
+ return ret;
+ }
+
+ /**
+ * Encodes the bitmap in blocks of offsets. Within each block, the bits are
+ * stored as absolute offsets from the start of the block.
+ *
+ * @param offsets
+ * uncompressed contents of the bitmap, expressed as a list of
+ * the offsets of different bits
+ * @return compressed version of said bitmap
+ */
+ public static char[] genOffsetBitmap(int[] offsets)
+ {
+ int lastOffset = offsets[offsets.length - 1];
+
+ // Build up the blocks
+ int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
+ // To simplify the logic, we make two passes.
+ // The first pass divides the offsets by block.
+ int[] blockLengths = new int[numBlocks];
+ Arrays.fill(blockLengths, 0);
+
+ for (int ix = 0; ix < offsets.length; ix++) {
+ int val = offsets[ix];
+ int blockForVal = val / BITMAP_BLOCK_SZ;
+
+ blockLengths[blockForVal]++;
+ }
+
+ // The second pass creates the blocks.
+ int totalSize = numBlocks;
+ for (int block = 0; block < numBlocks; block++) {
+ totalSize += blockLengths[block];
+ }
+ char[] encodedBlocks = new char[totalSize];
+
+ int inputIx = 0;
+ int blockStartIx = 0;
+ for (int block = 0; block < numBlocks; block++) {
+ int blockSz = blockLengths[block];
+
+ // First entry in the block is number of bits
+ encodedBlocks[blockStartIx] = (char) blockSz;
+
+ for (int i = 0; i < blockSz; i++) {
+ encodedBlocks[blockStartIx + i + 1] = (char)
+ (offsets[inputIx+i] % BITMAP_BLOCK_SZ);
+ }
+
+ inputIx += blockSz;
+ blockStartIx += blockSz + 1;
+ }
+
+ return encodedBlocks;
+ }
+
+ /**
+ *
+ * @param colIndex
+ * @param rawblock
+ * @return
+ * @throws DMLRuntimeException
+ */
+ private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros)
+ {
+ //probe map for distinct items (for value or value groups)
+ DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.getNumColumns():rawblock.getNumRows();
+
+ if( rawblock.isInSparseFormat() //SPARSE
+ && CompressedMatrixBlock.TRANSPOSE_INPUT )
+ {
+ SparseBlock a = rawblock.getSparseBlock();
+ if( a != null && !a.isEmpty(colIndex) )
+ {
+ int apos = a.pos(colIndex);
+ int alen = a.size(colIndex);
+ int[] aix = a.indexes(colIndex);
+ double[] avals = a.values(colIndex);
+
+ IntArrayList lstPtr0 = new IntArrayList(); //for 0 values
+ int last = -1;
+ //iterate over non-zero entries but fill in zeros
+ for( int j=apos; j<apos+alen; j++ )
+ {
+ //fill in zero values
+ if( !skipZeros )
+ for( int k=last+1; k<aix[j]; k++ )
+ lstPtr0.appendValue(k);
+ //handle non-zero value
+ IntArrayList lstPtr = distinctVals.get(avals[j]);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(avals[j], lstPtr);
+ }
+ lstPtr.appendValue(aix[j]);
+ last = aix[j];
+ }
+ //fill in remaining zero values
+ if( !skipZeros ) {
+ for( int k=last+1; k<m; k++ )
+ lstPtr0.appendValue(k);
+ if( lstPtr0.size()>0 )
+ distinctVals.appendValue(0, lstPtr0);
+ }
+ }
+ else if( !skipZeros ) { //full 0 column
+ IntArrayList lstPtr = new IntArrayList();
+ for( int i=0; i<m; i++ )
+ lstPtr.appendValue(i);
+ distinctVals.appendValue(0, lstPtr);
+ }
+ }
+ else //GENERAL CASE
+ {
+ for( int i=0; i<m; i++ ) {
+ double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.quickGetValue(colIndex, i):
+ rawblock.quickGetValue(i, colIndex);
+ if( val!=0 || !skipZeros ) {
+ IntArrayList lstPtr = distinctVals.get(val);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(val, lstPtr);
+ }
+ lstPtr.appendValue(i);
+ }
+ }
+ }
+
+ return new UncompressedBitmap(distinctVals);
+ }
+
+ /**
+ *
+ * @param colIndex
+ * @param rawblock
+ * @param sampleIndexes
+ * @return
+ */
+ private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros)
+ {
+ //note: general case only because anyway binary search for small samples
+
+ //probe map for distinct items (for value or value groups)
+ DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ final int m = sampleIndexes.length;
+ for( int i=0; i<m; i++ ) {
+ int rowIndex = sampleIndexes[i];
+ double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.quickGetValue(colIndex, rowIndex) :
+ rawblock.quickGetValue(rowIndex, colIndex);
+ if( val!=0 || !skipZeros ) {
+ IntArrayList lstPtr = distinctVals.get(val);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(val, lstPtr);
+ }
+ lstPtr.appendValue(i);
+ }
+ }
+
+ return new UncompressedBitmap(distinctVals);
+ }
+
+ /**
+ *
+ * @param colIndices
+ * @param rawblock
+ * @param rowReader
+ * @return
+ */
+ private static UncompressedBitmap extractBitmap(int[] colIndices,
+ MatrixBlock rawblock, ReaderColumnSelection rowReader)
+ {
+ //probe map for distinct items (for value or value groups)
+ DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ DblArray cellVals = null;
+ while ((cellVals = rowReader.nextRow()) != null) {
+ IntArrayList lstPtr = distinctVals.get(cellVals);
+ if (lstPtr == null) {
+ //create new objects only on demand
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(new DblArray(cellVals), lstPtr);
+ }
+ lstPtr.appendValue(rowReader.getCurrentRowIndex());
+ }
+
+ return new UncompressedBitmap(distinctVals, colIndices.length);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
index 647a701..b1072d1 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -1,259 +1,259 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
-import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
-
-/**
- * Class that stores information about a column group within a compressed matrix
- * block. There are subclasses specific to each compression type.
- *
- */
-public abstract class ColGroup implements Serializable
-{
- private static final long serialVersionUID = 2439785418908671481L;
-
- public enum CompressionType {
- UNCOMPRESSED, //uncompressed sparse/dense
- RLE_BITMAP, //RLE bitmap
- OLE_BITMAP; //OLE bitmap
- }
-
- /**
- * Offsets of the columns that make up the column group. Zero-based, and
- * relative to the matrix block.
- */
- protected int[] _colIndexes;
-
- /** Number of rows in the matrix, for use by child classes. */
- protected int _numRows;
-
- /** How the elements of the column group are compressed. */
- private CompressionType _compType;
-
-
- /**
- * Main constructor.
- *
- * @param colIndices
- * offsets of the columns in the matrix block that make up the
- * group
- * @param numRows
- * total number of rows in the parent block
- */
- protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
- _compType = type;
- _colIndexes = colIndices;
- _numRows = numRows;
- }
-
- /** Convenience constructor for converting indices to a more compact format. */
- protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
- _compType = type;
- _colIndexes = new int[colIndicesList.size()];
- int i = 0;
- for (Integer index : colIndicesList)
- _colIndexes[i++] = index;
- }
-
- /**
- * @return offsets of the columns in the matrix block that make up the group
- */
- public int[] getColIndices() {
- return _colIndexes;
- }
-
- /**
- * @param col
- * an index from 0 to the number of columns in this group - 1
- * @return offset of the specified column in the matrix block that make up
- * the group
- */
- public int getColIndex(int colNum) {
- return _colIndexes[colNum];
- }
-
- public int getNumRows() {
- return _numRows;
- }
-
- /**
- * @return number of columns in this column group
- */
- public int getNumCols() {
- return _colIndexes.length;
- }
-
- /**
- * @return How the elements of the column group are compressed.
- */
- public CompressionType getCompType() {
- return _compType;
- }
-
- /**
- *
- * @param offset
- */
- public void shiftColIndices(int offset) {
- for( int i=0; i<_colIndexes.length; i++ )
- _colIndexes[i] += offset;
- }
-
- /**
- * Note: Must be overridden by child classes to account for additional data
- * and metadata
- *
- * @return an upper bound on the number of bytes used to store this ColGroup
- * in memory.
- */
- public long estimateInMemorySize() {
- // int numRows (4B) , array reference colIndices (8B) + array object
- // overhead if exists (32B) + 4B per element, CompressionType compType
- // (2 booleans 2B + enum overhead 32B + reference to enum 8B)
- long size = 54;
- if (_colIndexes == null)
- return size;
- else
- return size + 32 + 4 * _colIndexes.length;
- }
-
- /**
- * Decompress the contents of this column group into the specified full
- * matrix block.
- *
- * @param target
- * a matrix block where the columns covered by this column group
- * have not yet been filled in.
- */
- public abstract void decompressToBlock(MatrixBlock target);
-
- /**
- * Decompress the contents of this column group into uncompressed packed
- * columns
- *
- * @param target
- * a dense matrix block. The block must have enough space to hold
- * the contents of this column group.
- * @param colIndexTargets
- * array that maps column indices in the original matrix block to
- * columns of target.
- */
- public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
-
- /**
- *
- * @param target dense output vector
- * @param colpos column to decompress, error if larger or equal numCols
- */
- public abstract void decompressToBlock(MatrixBlock target, int colpos);
-
-
- /**
- * Serializes column group to data output.
- *
- * @param out
- * @throws IOException
- */
- public abstract void write(DataOutput out)
- throws IOException;
-
- /**
- * Deserializes column group from data input.
- *
- * @param in
- * @throws IOException
- */
- public abstract void readFields(DataInput in)
- throws IOException;
-
-
- /**
- * Returns the exact serialized size of column group.
- * This can be used for example for buffer preallocation.
- *
- * @return
- */
- public abstract long getExactSizeOnDisk();
-
- /**
- * Multiply the slice of the matrix that this column group represents by a
- * vector on the right.
- *
- * @param vector
- * vector to multiply by (tall vector)
- * @param result
- * accumulator for holding the result
- * @throws DMLRuntimeException
- * if the internal SystemML code that performs the
- * multiplication experiences an error
- */
- public abstract void rightMultByVector(MatrixBlock vector,
- MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
-
-
- /**
- * Multiply the slice of the matrix that this column group represents by a
- * row vector on the left (the original column vector is assumed to be
- * transposed already i.e. its size now is 1xn).
- *
- * @param vector
- * @param result
- * @throws DMLRuntimeException
- */
- public abstract void leftMultByRowVector(MatrixBlock vector,
- MatrixBlock result) throws DMLRuntimeException;
-
- /**
- * Perform the specified scalar operation directly on the compressed column
- * group, without decompressing individual cells if possible.
- *
- * @param op
- * operation to perform
- * @return version of this column group with the operation applied
- */
- public abstract ColGroup scalarOperation(ScalarOperator op)
- throws DMLRuntimeException;
-
- /**
- *
- * @param op
- * @param result
- * @throws DMLUnsupportedOperationException
- * @throws DMLRuntimeException
- */
- public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
- throws DMLRuntimeException;
-
- /**
- *
- * @param rnnz
- */
- protected abstract void countNonZerosPerRow(int[] rnnz);
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class that stores information about a column group within a compressed matrix
+ * block. There are subclasses specific to each compression type.
+ *
+ */
+public abstract class ColGroup implements Serializable
+{
+ private static final long serialVersionUID = 2439785418908671481L;
+
+ public enum CompressionType {
+ UNCOMPRESSED, //uncompressed sparse/dense
+ RLE_BITMAP, //RLE bitmap
+ OLE_BITMAP; //OLE bitmap
+ }
+
+ /**
+ * Offsets of the columns that make up the column group. Zero-based, and
+ * relative to the matrix block.
+ */
+ protected int[] _colIndexes;
+
+ /** Number of rows in the matrix, for use by child classes. */
+ protected int _numRows;
+
+ /** How the elements of the column group are compressed. */
+ private CompressionType _compType;
+
+
+ /**
+ * Main constructor.
+ *
+ * @param colIndices
+ * offsets of the columns in the matrix block that make up the
+ * group
+ * @param numRows
+ * total number of rows in the parent block
+ */
+ protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
+ _compType = type;
+ _colIndexes = colIndices;
+ _numRows = numRows;
+ }
+
+ /** Convenience constructor for converting indices to a more compact format. */
+ protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
+ _compType = type;
+ _colIndexes = new int[colIndicesList.size()];
+ int i = 0;
+ for (Integer index : colIndicesList)
+ _colIndexes[i++] = index;
+ }
+
+ /**
+ * @return offsets of the columns in the matrix block that make up the group
+ */
+ public int[] getColIndices() {
+ return _colIndexes;
+ }
+
+ /**
+ * @param col
+ * an index from 0 to the number of columns in this group - 1
+ * @return offset of the specified column in the matrix block that make up
+ * the group
+ */
+ public int getColIndex(int colNum) {
+ return _colIndexes[colNum];
+ }
+
+ public int getNumRows() {
+ return _numRows;
+ }
+
+ /**
+ * @return number of columns in this column group
+ */
+ public int getNumCols() {
+ return _colIndexes.length;
+ }
+
+ /**
+ * @return How the elements of the column group are compressed.
+ */
+ public CompressionType getCompType() {
+ return _compType;
+ }
+
+ /**
+ *
+ * @param offset
+ */
+ public void shiftColIndices(int offset) {
+ for( int i=0; i<_colIndexes.length; i++ )
+ _colIndexes[i] += offset;
+ }
+
+ /**
+ * Note: Must be overridden by child classes to account for additional data
+ * and metadata
+ *
+ * @return an upper bound on the number of bytes used to store this ColGroup
+ * in memory.
+ */
+ public long estimateInMemorySize() {
+ // int numRows (4B) , array reference colIndices (8B) + array object
+ // overhead if exists (32B) + 4B per element, CompressionType compType
+ // (2 booleans 2B + enum overhead 32B + reference to enum 8B)
+ long size = 54;
+ if (_colIndexes == null)
+ return size;
+ else
+ return size + 32 + 4 * _colIndexes.length;
+ }
+
+ /**
+ * Decompress the contents of this column group into the specified full
+ * matrix block.
+ *
+ * @param target
+ * a matrix block where the columns covered by this column group
+ * have not yet been filled in.
+ */
+ public abstract void decompressToBlock(MatrixBlock target);
+
+ /**
+ * Decompress the contents of this column group into uncompressed packed
+ * columns
+ *
+ * @param target
+ * a dense matrix block. The block must have enough space to hold
+ * the contents of this column group.
+ * @param colIndexTargets
+ * array that maps column indices in the original matrix block to
+ * columns of target.
+ */
+ public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
+
+ /**
+ *
+ * @param target dense output vector
+ * @param colpos column to decompress, error if larger or equal numCols
+ */
+ public abstract void decompressToBlock(MatrixBlock target, int colpos);
+
+
+ /**
+ * Serializes column group to data output.
+ *
+ * @param out
+ * @throws IOException
+ */
+ public abstract void write(DataOutput out)
+ throws IOException;
+
+ /**
+ * Deserializes column group from data input.
+ *
+ * @param in
+ * @throws IOException
+ */
+ public abstract void readFields(DataInput in)
+ throws IOException;
+
+
+ /**
+ * Returns the exact serialized size of column group.
+ * This can be used for example for buffer preallocation.
+ *
+ * @return
+ */
+ public abstract long getExactSizeOnDisk();
+
+ /**
+ * Multiply the slice of the matrix that this column group represents by a
+ * vector on the right.
+ *
+ * @param vector
+ * vector to multiply by (tall vector)
+ * @param result
+ * accumulator for holding the result
+ * @throws DMLRuntimeException
+ * if the internal SystemML code that performs the
+ * multiplication experiences an error
+ */
+ public abstract void rightMultByVector(MatrixBlock vector,
+ MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
+
+
+ /**
+ * Multiply the slice of the matrix that this column group represents by a
+ * row vector on the left (the original column vector is assumed to be
+ * transposed already i.e. its size now is 1xn).
+ *
+ * @param vector
+ * @param result
+ * @throws DMLRuntimeException
+ */
+ public abstract void leftMultByRowVector(MatrixBlock vector,
+ MatrixBlock result) throws DMLRuntimeException;
+
+ /**
+ * Perform the specified scalar operation directly on the compressed column
+ * group, without decompressing individual cells if possible.
+ *
+ * @param op
+ * operation to perform
+ * @return version of this column group with the operation applied
+ */
+ public abstract ColGroup scalarOperation(ScalarOperator op)
+ throws DMLRuntimeException;
+
+ /**
+ *
+ * @param op
+ * @param result
+ * @throws DMLUnsupportedOperationException
+ * @throws DMLRuntimeException
+ */
+ public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
+ throws DMLRuntimeException;
+
+ /**
+ *
+ * @param rnnz
+ */
+ protected abstract void countNonZerosPerRow(int[] rnnz);
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
index 5d60a49..3455cc7 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
@@ -1,498 +1,498 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.compress;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.TreeMap;
-import java.util.Map.Entry;
-
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
-
-
-/**
- * Base class for column groups encoded with various types of bitmap encoding.
- *
- *
- * NOTES:
- * * OLE: separate storage segment length and bitmaps led to a 30% improvement
- * but not applied because more difficult to support both data layouts at the
- * same time (distributed/local as well as w/ and w/o low-level opt)
- */
-public abstract class ColGroupBitmap extends ColGroup
-{
- private static final long serialVersionUID = -1635828933479403125L;
-
- public static final boolean LOW_LEVEL_OPT = true;
- //sorting of values by physical length helps by 10-20%, especially for serial, while
- //slight performance decrease for parallel incl multi-threaded, hence not applied for
- //distributed operations (also because compression time + garbage collection increases)
- private static final boolean SORT_VALUES_BY_LENGTH = true;
- protected static final boolean CREATE_SKIPLIST = true;
-
- protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
- protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-
- /** Distinct values associated with individual bitmaps. */
- protected double[] _values; //linearized <numcol vals> <numcol vals>
-
- /** Bitmaps, one per uncompressed value in {@link #values}. */
- protected int[] _ptr; //bitmap offsets per value
- protected char[] _data; //linearized bitmaps (variable length)
-
- protected int[] _skiplist;
-
- public ColGroupBitmap(CompressionType type) {
- super(type, (int[]) null, -1);
- }
-
- /**
- * Main constructor. Stores the headers for the individual bitmaps.
- *
- * @param colIndices
- * indices (within the block) of the columns included in this
- * column
- * @param numRows
- * total number of rows in the parent block
- * @param ubm
- * Uncompressed bitmap representation of the block
- */
- public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm)
- {
- super(type, colIndices, numRows);
-
- // Extract and store just the distinct values. The bitmaps themselves go
- // into the subclasses.
- final int numCols = ubm.getNumColumns();
- final int numVals = ubm.getNumValues();
-
- _values = new double[numVals*numCols];
- for (int i=0; i<numVals; i++) {
- //note: deep copied internally on getValues
- double[] tmp = ubm.getValues(i);
- System.arraycopy(tmp, 0, _values, i*numCols, numCols);
- }
- }
-
- /**
- * Constructor for subclass methods that need to create shallow copies
- *
- * @param colIndices
- * raw column index information
- * @param numRows
- * number of rows in the block
- * @param values
- * set of distinct values for the block (associated bitmaps are
- * kept in the subclass)
- */
- protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) {
- super(type, colIndices, numRows);
- _values = values;
- }
-
- protected final int len(int k) {
- return _ptr[k+1] - _ptr[k];
- }
-
- /**
- *
- * @param numVals
- * @param totalLen
- * @param lbitmaps
- */
- protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
- {
- // compact bitmaps to linearized representation
- if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
- && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
- {
- // sort value by num segments in descending order
- TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
- for( int i=0; i<numVals; i++ ) {
- int revlen = totalLen-lbitmaps[i].length;
- if( !tree.containsKey(revlen) )
- tree.put(revlen, new ArrayList<Integer>());
- tree.get(revlen).add(i);
- }
-
- // compact bitmaps to linearized representation
- _ptr = new int[numVals+1];
- _data = new char[totalLen];
- int pos = 0, off = 0;
- for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
- for( Integer tmpix : e.getValue() ) {
- int len = lbitmaps[tmpix].length;
- _ptr[pos] = off;
- System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
- off += len;
- pos++;
- }
- }
- _ptr[numVals] = totalLen;
-
- // reorder values
- double[] lvalues = new double[_values.length];
- int off2 = 0; int numCols = _colIndexes.length;
- for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
- for( Integer tmpix : e.getValue() ) {
- System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);
- off2 += numCols;
- }
- }
- _values = lvalues;
- }
- else
- {
- // compact bitmaps to linearized representation
- _ptr = new int[numVals+1];
- _data = new char[totalLen];
- for( int i=0, off=0; i<numVals; i++ ) {
- int len = lbitmaps[i].length;
- _ptr[i] = off;
- System.arraycopy(lbitmaps[i], 0, _data, off, len);
- off += len;
- }
- _ptr[numVals] = totalLen;
- }
- }
-
- @Override
- public long estimateInMemorySize() {
- long size = super.estimateInMemorySize();
-
- // adding the size of values
- size += 8; //array reference
- if (_values != null) {
- size += 32 + _values.length * 8; //values
- }
-
- // adding bitmaps size
- size += 16; //array references
- if (_data != null) {
- size += 32 + _ptr.length * 4; // offsets
- size += 32 + _data.length * 2; // bitmaps
- }
-
- return size;
- }
-
- //generic decompression for OLE/RLE, to be overwritten for performance
- @Override
- public void decompressToBlock(MatrixBlock target)
- {
- final int numCols = getNumCols();
- final int numVals = getNumValues();
- int[] colIndices = getColIndices();
-
- // Run through the bitmaps for this column group
- for (int i = 0; i < numVals; i++) {
- Iterator<Integer> decoder = getDecodeIterator(i);
- int valOff = i*numCols;
-
- while (decoder.hasNext()) {
- int row = decoder.next();
- for (int colIx = 0; colIx < numCols; colIx++) {
- target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
- }
- }
- }
- }
-
- //generic decompression for OLE/RLE, to be overwritten for performance
- @Override
- public void decompressToBlock(MatrixBlock target, int[] colIndexTargets)
- {
- final int numCols = getNumCols();
- final int numVals = getNumValues();
-
- // Run through the bitmaps for this column group
- for (int i = 0; i < numVals; i++) {
- Iterator<Integer> decoder = getDecodeIterator(i);
- int valOff = i*numCols;
-
- while (decoder.hasNext()) {
- int row = decoder.next();
- for (int colIx = 0; colIx < numCols; colIx++) {
- int origMatrixColIx = getColIndex(colIx);
- int targetColIx = colIndexTargets[origMatrixColIx];
- target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
- }
- }
- }
- }
-
- //generic decompression for OLE/RLE, to be overwritten for performance
- @Override
- public void decompressToBlock(MatrixBlock target, int colpos)
- {
- final int numCols = getNumCols();
- final int numVals = getNumValues();
-
- // Run through the bitmaps for this column group
- for (int i = 0; i < numVals; i++) {
- Iterator<Integer> decoder = getDecodeIterator(i);
- int valOff = i*numCols;
-
- while (decoder.hasNext()) {
- int row = decoder.next();
- target.quickSetValue(row, 0, _values[valOff+colpos]);
- }
- }
- }
-
- /**
- *
- * @param bitmapIx
- * @return
- */
- protected double sumValues(int bitmapIx)
- {
- final int numCols = getNumCols();
- final int valOff = bitmapIx * numCols;
-
- double val = 0.0;
- for( int i = 0; i < numCols; i++ ) {
- val += _values[valOff+i];
- }
-
- return val;
- }
-
- protected double sumValues(int bitmapIx, double[] b)
- {
- final int numCols = getNumCols();
- final int valOff = bitmapIx * numCols;
-
- double val = 0;
- for( int i = 0; i < numCols; i++ ) {
- val += _values[valOff+i] * b[i];
- }
-
- return val;
- }
-
- /**
- *
- * @param b
- * @param c
- */
- protected void sumAllValues(double[] b, double[] c)
- {
- final int numVals = getNumValues();
- final int numCols = getNumCols();
-
- //vectMultiplyAdd over cols instead of dotProduct over vals because
- //usually more values than columns
- for( int i=0, off=0; i<numCols; i++, off+=numVals )
- LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
- }
-
- /**
- * Method for use by subclasses. Applies a scalar operation to the value
- * metadata stored in the superclass.
- *
- * @param op
- * scalar operation to perform
- * @return transformed copy of value metadata for this column group
- * @throws DMLRuntimeException
- */
- protected double[] applyScalarOp(ScalarOperator op)
- throws DMLRuntimeException
- {
- //scan over linearized values
- double[] ret = new double[_values.length];
- for (int i = 0; i < _values.length; i++) {
- ret[i] = op.executeScalar(_values[i]);
- }
-
- return ret;
- }
-
- /**
- *
- * @param op
- * @param newVal
- * @param numCols
- * @return
- * @throws DMLRuntimeException
- */
- protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
- throws DMLRuntimeException
- {
- //scan over linearized values
- double[] ret = new double[_values.length + numCols];
- for( int i = 0; i < _values.length; i++ ) {
- ret[i] = op.executeScalar(_values[i]);
- }
-
- //add new value to the end
- Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
-
- return ret;
- }
-
- /**
- * @return the number of distinct sets of values associated with the bitmaps
- * in this column group
- */
- public int getNumValues() {
- return _values.length / _colIndexes.length;
- }
-
- /**
- *
- * @return
- */
- public double[] getValues() {
- return _values;
- }
-
- /**
- *
- * @return
- */
- public char[] getBitmaps() {
- return _data;
- }
-
- public int[] getBitmapOffsets() {
- return _ptr;
- }
-
- /**
- * @param bmpIx
- * index of a specific compressed bitmap (stored in subclass,
- * index same as {@link #values})
- * @return an object for iterating over the row offsets in this bitmap. Only
- * valid until the next call to this method. May be reused across
- * calls.
- */
- public abstract Iterator<Integer> getDecodeIterator(int bmpIx);
-
- /**
- * Utility function of sparse-unsafe operations.
- *
- * @param ind
- * @return
- * @throws DMLRuntimeException
- */
- protected int[] computeOffsets(boolean[] ind)
- throws DMLRuntimeException
- {
- //determine number of offsets
- int numOffsets = 0;
- for( int i=0; i<ind.length; i++ )
- numOffsets += ind[i] ? 1 : 0;
-
- //create offset lists
- int[] ret = new int[numOffsets];
- for( int i=0, pos=0; i<ind.length; i++ )
- if( ind[i] )
- ret[pos++] = i;
-
- return ret;
- }
-
- @Override
- public void readFields(DataInput in)
- throws IOException
- {
- _numRows = in.readInt();
- int numCols = in.readInt();
- int numVals = in.readInt();
-
- //read col indices
- _colIndexes = new int[ numCols ];
- for( int i=0; i<numCols; i++ )
- _colIndexes[i] = in.readInt();
-
- //read distinct values
- _values = new double[numVals*numCols];
- for( int i=0; i<numVals*numCols; i++ )
- _values[i] = in.readDouble();
-
- //read bitmaps
- int totalLen = in.readInt();
- _ptr = new int[numVals+1];
- _data = new char[totalLen];
- for( int i=0, off=0; i<numVals; i++ ) {
- int len = in.readInt();
- _ptr[i] = off;
- for( int j=0; j<len; j++ )
- _data[off+j] = in.readChar();
- off += len;
- }
- _ptr[numVals] = totalLen;
- }
-
- @Override
- public void write(DataOutput out)
- throws IOException
- {
- int numCols = getNumCols();
- int numVals = getNumValues();
- out.writeInt(_numRows);
- out.writeInt(numCols);
- out.writeInt(numVals);
-
- //write col indices
- for( int i=0; i<_colIndexes.length; i++ )
- out.writeInt( _colIndexes[i] );
-
- //write distinct values
- for( int i=0; i<_values.length; i++ )
- out.writeDouble(_values[i]);
-
- //write bitmaps (lens and data, offset later recreated)
- int totalLen = 0;
- for( int i=0; i<numVals; i++ )
- totalLen += len(i);
- out.writeInt(totalLen);
- for( int i=0; i<numVals; i++ ) {
- int len = len(i);
- int off = _ptr[i];
- out.writeInt(len);
- for( int j=0; j<len; j++ )
- out.writeChar(_data[off+j]);
- }
- }
-
- @Override
- public long getExactSizeOnDisk() {
- long ret = 12; //header
- //col indices
- ret += 4 * _colIndexes.length;
- //distinct values (groups of values)
- ret += 8 * _values.length;
- //actual bitmaps
- ret += 4; //total length
- for( int i=0; i<getNumValues(); i++ )
- ret += 4 + 2 * len(i);
-
- return ret;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+
+/**
+ * Base class for column groups encoded with various types of bitmap encoding.
+ *
+ *
+ * NOTES:
+ * * OLE: separate storage segment length and bitmaps led to a 30% improvement
+ * but not applied because more difficult to support both data layouts at the
+ * same time (distributed/local as well as w/ and w/o low-level opt)
+ */
+public abstract class ColGroupBitmap extends ColGroup
+{
+ private static final long serialVersionUID = -1635828933479403125L;
+
+ public static final boolean LOW_LEVEL_OPT = true;
+ //sorting of values by physical length helps by 10-20%, especially for serial, while
+ //slight performance decrease for parallel incl multi-threaded, hence not applied for
+ //distributed operations (also because compression time + garbage collection increases)
+ private static final boolean SORT_VALUES_BY_LENGTH = true;
+ protected static final boolean CREATE_SKIPLIST = true;
+
+ protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+ protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+
+ /** Distinct values associated with individual bitmaps. */
+ protected double[] _values; //linearized <numcol vals> <numcol vals>
+
+ /** Bitmaps, one per uncompressed value in {@link #values}. */
+ protected int[] _ptr; //bitmap offsets per value
+ protected char[] _data; //linearized bitmaps (variable length)
+
+ protected int[] _skiplist;
+
+ public ColGroupBitmap(CompressionType type) {
+ super(type, (int[]) null, -1);
+ }
+
+ /**
+ * Main constructor. Stores the headers for the individual bitmaps.
+ *
+ * @param colIndices
+ * indices (within the block) of the columns included in this
+ * column
+ * @param numRows
+ * total number of rows in the parent block
+ * @param ubm
+ * Uncompressed bitmap representation of the block
+ */
+ public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm)
+ {
+ super(type, colIndices, numRows);
+
+ // Extract and store just the distinct values. The bitmaps themselves go
+ // into the subclasses.
+ final int numCols = ubm.getNumColumns();
+ final int numVals = ubm.getNumValues();
+
+ _values = new double[numVals*numCols];
+ for (int i=0; i<numVals; i++) {
+ //note: deep copied internally on getValues
+ double[] tmp = ubm.getValues(i);
+ System.arraycopy(tmp, 0, _values, i*numCols, numCols);
+ }
+ }
+
+ /**
+ * Constructor for subclass methods that need to create shallow copies
+ *
+ * @param colIndices
+ * raw column index information
+ * @param numRows
+ * number of rows in the block
+ * @param values
+ * set of distinct values for the block (associated bitmaps are
+ * kept in the subclass)
+ */
+ protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) {
+ super(type, colIndices, numRows);
+ _values = values;
+ }
+
+ protected final int len(int k) {
+ return _ptr[k+1] - _ptr[k];
+ }
+
+ /**
+ *
+ * @param numVals
+ * @param totalLen
+ * @param lbitmaps
+ */
+ protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
+ {
+ // compact bitmaps to linearized representation
+ if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
+ && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
+ {
+ // sort value by num segments in descending order
+ TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
+ for( int i=0; i<numVals; i++ ) {
+ int revlen = totalLen-lbitmaps[i].length;
+ if( !tree.containsKey(revlen) )
+ tree.put(revlen, new ArrayList<Integer>());
+ tree.get(revlen).add(i);
+ }
+
+ // compact bitmaps to linearized representation
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ int pos = 0, off = 0;
+ for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+ for( Integer tmpix : e.getValue() ) {
+ int len = lbitmaps[tmpix].length;
+ _ptr[pos] = off;
+ System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
+ off += len;
+ pos++;
+ }
+ }
+ _ptr[numVals] = totalLen;
+
+ // reorder values
+ double[] lvalues = new double[_values.length];
+ int off2 = 0; int numCols = _colIndexes.length;
+ for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+ for( Integer tmpix : e.getValue() ) {
+ System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);
+ off2 += numCols;
+ }
+ }
+ _values = lvalues;
+ }
+ else
+ {
+ // compact bitmaps to linearized representation
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ for( int i=0, off=0; i<numVals; i++ ) {
+ int len = lbitmaps[i].length;
+ _ptr[i] = off;
+ System.arraycopy(lbitmaps[i], 0, _data, off, len);
+ off += len;
+ }
+ _ptr[numVals] = totalLen;
+ }
+ }
+
+ @Override
+ public long estimateInMemorySize() {
+ long size = super.estimateInMemorySize();
+
+ // adding the size of values
+ size += 8; //array reference
+ if (_values != null) {
+ size += 32 + _values.length * 8; //values
+ }
+
+ // adding bitmaps size
+ size += 16; //array references
+ if (_data != null) {
+ size += 32 + _ptr.length * 4; // offsets
+ size += 32 + _data.length * 2; // bitmaps
+ }
+
+ return size;
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ int[] colIndices = getColIndices();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ for (int colIx = 0; colIx < numCols; colIx++) {
+ target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
+ }
+ }
+ }
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target, int[] colIndexTargets)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ for (int colIx = 0; colIx < numCols; colIx++) {
+ int origMatrixColIx = getColIndex(colIx);
+ int targetColIx = colIndexTargets[origMatrixColIx];
+ target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
+ }
+ }
+ }
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target, int colpos)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ target.quickSetValue(row, 0, _values[valOff+colpos]);
+ }
+ }
+ }
+
+ /**
+ *
+ * @param bitmapIx
+ * @return
+ */
+ protected double sumValues(int bitmapIx)
+ {
+ final int numCols = getNumCols();
+ final int valOff = bitmapIx * numCols;
+
+ double val = 0.0;
+ for( int i = 0; i < numCols; i++ ) {
+ val += _values[valOff+i];
+ }
+
+ return val;
+ }
+
+ protected double sumValues(int bitmapIx, double[] b)
+ {
+ final int numCols = getNumCols();
+ final int valOff = bitmapIx * numCols;
+
+ double val = 0;
+ for( int i = 0; i < numCols; i++ ) {
+ val += _values[valOff+i] * b[i];
+ }
+
+ return val;
+ }
+
+ /**
+ *
+ * @param b
+ * @param c
+ */
+ protected void sumAllValues(double[] b, double[] c)
+ {
+ final int numVals = getNumValues();
+ final int numCols = getNumCols();
+
+ //vectMultiplyAdd over cols instead of dotProduct over vals because
+ //usually more values than columns
+ for( int i=0, off=0; i<numCols; i++, off+=numVals )
+ LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
+ }
+
+ /**
+ * Method for use by subclasses. Applies a scalar operation to the value
+ * metadata stored in the superclass.
+ *
+ * @param op
+ * scalar operation to perform
+ * @return transformed copy of value metadata for this column group
+ * @throws DMLRuntimeException
+ */
+ protected double[] applyScalarOp(ScalarOperator op)
+ throws DMLRuntimeException
+ {
+ //scan over linearized values
+ double[] ret = new double[_values.length];
+ for (int i = 0; i < _values.length; i++) {
+ ret[i] = op.executeScalar(_values[i]);
+ }
+
+ return ret;
+ }
+
+ /**
+ *
+ * @param op
+ * @param newVal
+ * @param numCols
+ * @return
+ * @throws DMLRuntimeException
+ */
+ protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
+ throws DMLRuntimeException
+ {
+ //scan over linearized values
+ double[] ret = new double[_values.length + numCols];
+ for( int i = 0; i < _values.length; i++ ) {
+ ret[i] = op.executeScalar(_values[i]);
+ }
+
+ //add new value to the end
+ Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
+
+ return ret;
+ }
+
+ /**
+ * @return the number of distinct sets of values associated with the bitmaps
+ * in this column group
+ */
+ public int getNumValues() {
+ return _values.length / _colIndexes.length;
+ }
+
+ /**
+ *
+ * @return
+ */
+ public double[] getValues() {
+ return _values;
+ }
+
+ /**
+ *
+ * @return
+ */
+ public char[] getBitmaps() {
+ return _data;
+ }
+
+ public int[] getBitmapOffsets() {
+ return _ptr;
+ }
+
+ /**
+ * @param bmpIx
+ * index of a specific compressed bitmap (stored in subclass,
+ * index same as {@link #values})
+ * @return an object for iterating over the row offsets in this bitmap. Only
+ * valid until the next call to this method. May be reused across
+ * calls.
+ */
+ public abstract Iterator<Integer> getDecodeIterator(int bmpIx);
+
+ /**
+ * Utility function of sparse-unsafe operations.
+ *
+ * @param ind
+ * @return
+ * @throws DMLRuntimeException
+ */
+ protected int[] computeOffsets(boolean[] ind)
+ throws DMLRuntimeException
+ {
+ //determine number of offsets
+ int numOffsets = 0;
+ for( int i=0; i<ind.length; i++ )
+ numOffsets += ind[i] ? 1 : 0;
+
+ //create offset lists
+ int[] ret = new int[numOffsets];
+ for( int i=0, pos=0; i<ind.length; i++ )
+ if( ind[i] )
+ ret[pos++] = i;
+
+ return ret;
+ }
+
+ @Override
+ public void readFields(DataInput in)
+ throws IOException
+ {
+ _numRows = in.readInt();
+ int numCols = in.readInt();
+ int numVals = in.readInt();
+
+ //read col indices
+ _colIndexes = new int[ numCols ];
+ for( int i=0; i<numCols; i++ )
+ _colIndexes[i] = in.readInt();
+
+ //read distinct values
+ _values = new double[numVals*numCols];
+ for( int i=0; i<numVals*numCols; i++ )
+ _values[i] = in.readDouble();
+
+ //read bitmaps
+ int totalLen = in.readInt();
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ for( int i=0, off=0; i<numVals; i++ ) {
+ int len = in.readInt();
+ _ptr[i] = off;
+ for( int j=0; j<len; j++ )
+ _data[off+j] = in.readChar();
+ off += len;
+ }
+ _ptr[numVals] = totalLen;
+ }
+
+ @Override
+ public void write(DataOutput out)
+ throws IOException
+ {
+ int numCols = getNumCols();
+ int numVals = getNumValues();
+ out.writeInt(_numRows);
+ out.writeInt(numCols);
+ out.writeInt(numVals);
+
+ //write col indices
+ for( int i=0; i<_colIndexes.length; i++ )
+ out.writeInt( _colIndexes[i] );
+
+ //write distinct values
+ for( int i=0; i<_values.length; i++ )
+ out.writeDouble(_values[i]);
+
+ //write bitmaps (lens and data, offset later recreated)
+ int totalLen = 0;
+ for( int i=0; i<numVals; i++ )
+ totalLen += len(i);
+ out.writeInt(totalLen);
+ for( int i=0; i<numVals; i++ ) {
+ int len = len(i);
+ int off = _ptr[i];
+ out.writeInt(len);
+ for( int j=0; j<len; j++ )
+ out.writeChar(_data[off+j]);
+ }
+ }
+
+ @Override
+ public long getExactSizeOnDisk() {
+ long ret = 12; //header
+ //col indices
+ ret += 4 * _colIndexes.length;
+ //distinct values (groups of values)
+ ret += 8 * _values.length;
+ //actual bitmaps
+ ret += 4; //total length
+ for( int i=0; i<getNumValues(); i++ )
+ ret += 4 + 2 * len(i);
+
+ return ret;
+ }
+}