You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/17 00:23:31 UTC
[6/6] incubator-systemml git commit: [SYSTEMML-810] New compressed
matrix blocks and operations, tests
[SYSTEMML-810] New compressed matrix blocks and operations, tests
For details, see https://issues.apache.org/jira/browse/SYSTEMML-449.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/16e7b1c8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/16e7b1c8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/16e7b1c8
Branch: refs/heads/master
Commit: 16e7b1c88e45e007d1db229717311fcf70bc6b19
Parents: 71013e7
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Jul 16 00:43:16 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jul 16 17:22:50 2016 -0700
----------------------------------------------------------------------
.../runtime/compress/BitmapDecoderOLE.java | 127 ++
.../runtime/compress/BitmapDecoderRLE.java | 117 ++
.../sysml/runtime/compress/BitmapEncoder.java | 392 +++++
.../apache/sysml/runtime/compress/ColGroup.java | 259 ++++
.../sysml/runtime/compress/ColGroupBitmap.java | 498 +++++++
.../sysml/runtime/compress/ColGroupOLE.java | 634 +++++++++
.../sysml/runtime/compress/ColGroupRLE.java | 617 ++++++++
.../runtime/compress/ColGroupUncompressed.java | 360 +++++
.../runtime/compress/CompressedMatrixBlock.java | 1342 ++++++++++++++++++
.../runtime/compress/PlanningBinPacker.java | 191 +++
.../sysml/runtime/compress/PlanningCoCoder.java | 227 +++
.../runtime/compress/PlanningCoCodingGroup.java | 104 ++
.../compress/PlanningGroupMergeAction.java | 73 +
.../runtime/compress/ReaderColumnSelection.java | 64 +
.../compress/ReaderColumnSelectionDense.java | 68 +
.../ReaderColumnSelectionDenseSample.java | 86 ++
.../compress/ReaderColumnSelectionSparse.java | 115 ++
.../runtime/compress/UncompressedBitmap.java | 101 ++
.../compress/estim/CompressedSizeEstimator.java | 145 ++
.../estim/CompressedSizeEstimatorExact.java | 53 +
.../estim/CompressedSizeEstimatorSample.java | 767 ++++++++++
.../compress/estim/CompressedSizeInfo.java | 69 +
.../compress/estim/SizeEstimatorFactory.java | 40 +
.../runtime/compress/utils/ConverterUtils.java | 99 ++
.../sysml/runtime/compress/utils/DblArray.java | 91 ++
.../compress/utils/DblArrayIntListHashMap.java | 179 +++
.../compress/utils/DoubleIntListHashMap.java | 181 +++
.../runtime/compress/utils/IntArrayList.java | 102 ++
.../compress/utils/LinearAlgebraUtils.java | 383 +++++
.../runtime/functionobjects/KahanFunction.java | 8 +
.../runtime/functionobjects/KahanPlus.java | 5 +
.../runtime/functionobjects/KahanPlusSq.java | 15 +-
.../runtime/matrix/data/LibMatrixMult.java | 27 +-
.../sysml/runtime/matrix/data/MatrixBlock.java | 2 +-
.../compress/BasicCompressionTest.java | 168 +++
.../compress/BasicMatrixAppendTest.java | 176 +++
.../compress/BasicMatrixMultChainTest.java | 245 ++++
.../BasicMatrixTransposeSelfMultTest.java | 172 +++
.../compress/BasicMatrixVectorMultTest.java | 180 +++
.../BasicScalarOperationsSparseUnsafeTest.java | 177 +++
.../compress/BasicScalarOperationsTest.java | 177 +++
.../BasicTransposeSelfLeftMatrixMultTest.java | 172 +++
.../compress/BasicUnaryAggregateTest.java | 544 +++++++
.../compress/BasicVectorMatrixMultTest.java | 180 +++
.../functions/compress/CompressedLinregCG.java | 151 ++
.../compress/CompressedSerializationTest.java | 185 +++
.../compress/LargeCompressionTest.java | 169 +++
.../compress/LargeMatrixVectorMultTest.java | 180 +++
.../compress/LargeParMatrixVectorMultTest.java | 182 +++
.../compress/LargeVectorMatrixMultTest.java | 180 +++
.../compress/ParMatrixMultChainTest.java | 247 ++++
.../compress/ParMatrixVectorMultTest.java | 182 +++
.../ParTransposeSelfLeftMatrixMultTest.java | 174 +++
.../compress/ParUnaryAggregateTest.java | 547 +++++++
.../compress/ParVectorMatrixMultTest.java | 181 +++
src/test/scripts/functions/compress/LinregCG.R | 57 +
.../scripts/functions/compress/LinregCG.dml | 56 +
.../compress/SystemML-config-compress.xml | 59 +
.../functions/compress/ZPackageSuite.java | 56 +
59 files changed, 12322 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
new file mode 100644
index 0000000..fc6e861
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ *
+ */
+public final class BitmapDecoderOLE implements Iterator<Integer>
+{
+ // pointer to the compressed bitmap
+ private int _bmOff;
+ private int _bmLen;
+ private char[] _bmPtr;
+
+ // The index of the current block. Block 0 covers bits 1 through 2^16
+ private int _blockIx;
+
+ // The offset where the current block starts within the bitmap.
+ private int _blockStartOffset;
+
+ // The number of offsets in the current block.
+ private int _curBlockSize;
+
+ // The offset <b>in the current block</b> the <b>next</b> element we will
+ // read from the bitmap, or bmPtr.length if we are done.
+ private int _nextBmOffset;
+
+ /**
+ * Point this object at the beginning of a particular bitmap. After a call
+ * to this method, the next call to {@link #nextOffset()} will return the
+ * offset of the first bit in the specified bitmap.
+ *
+ * @param bmPtr
+ * pointer to a compressed bitmap
+ */
+ public BitmapDecoderOLE(char[] bmPtr, int off, int len) {
+ _bmOff = off;
+ _bmLen = len;
+ _bmPtr = bmPtr;
+ _blockIx = 0;
+ _blockStartOffset = 0;
+ _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+ if (_curBlockSize < 0) {
+ throw new RuntimeException(String.format(
+ "Negative block size %d at position %d of %s",
+ _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr)));
+ }
+ _nextBmOffset = 0;
+
+ // Advance past any zero-length blocks at the beginning of the array
+ while (_blockStartOffset < _bmLen
+ && _nextBmOffset >= _curBlockSize) {
+ advanceToNextBlock();
+ }
+ }
+
+ @Override
+ public Integer next() {
+ if( !hasNext() )
+ throw new RuntimeException("No next offset existing.");
+
+ // Grab the lookahead value Note the +1 in the array indexing;
+ // the first number in a block is the block size
+ int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset];
+ int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ)
+ + offsetFromBlockBegin;
+ _nextBmOffset++;
+
+ // Advance to next non-empty block if we reached the end of the block.
+ while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) {
+ advanceToNextBlock();
+ }
+
+ return ret;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return _blockStartOffset < _bmLen;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented for BitmapDecoderOLE.");
+ }
+
+ /**
+ * Move forward to the next block. Does not skip empty blocks.
+ */
+ private void advanceToNextBlock() {
+ _blockStartOffset += (1 + _curBlockSize);
+ _blockIx++;
+ if (_blockStartOffset >= _bmLen) {
+ // Read past last block
+ return;
+ }
+
+ _curBlockSize = _bmPtr[_bmOff+_blockStartOffset];
+ if (_curBlockSize < 0) {
+ throw new RuntimeException(String.format(
+ "Negative block size %d at position %d of %s",
+ _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr)));
+ }
+ _nextBmOffset = 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
new file mode 100644
index 0000000..54f24ae
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Iterator;
+
+/**
+ * General-purpose iterator to decode a compressed OLE bitmap.
+ *
+ */
+public final class BitmapDecoderRLE implements Iterator<Integer>
+{
+ // pointer to the compressed bitmap
+ private int _bmOff;
+ private int _bmLen;
+ private char[] _bmPtr;
+
+ // The offset of the <b>next</b> element we will read from the bitmap, or
+ // bmPtr.length if we are done.
+ private int _nextBmOffset;
+
+ // The offset in the matrix column of the beginning of the current run
+ private int _runStartOffset;
+
+ // The length of the current run
+ private int _curRunLen;
+
+ // The number of bits that we have returned from the current run.
+ private int _runBitsReturned;
+
+ /**
+ * Point this object at the beginning of a particular bitmap. After a call
+ * to this method, the next call to {@link #nextOffset()} will return the
+ * offset of the first bit in the specified bitmap.
+ *
+ * @param bmPtr
+ * pointer to a compressed bitmap
+ */
+ public BitmapDecoderRLE(char[] bmPtr, int off, int len) {
+ _bmOff = off;
+ _bmLen = len;
+ _bmPtr = bmPtr;
+ _nextBmOffset = 0;
+ _runStartOffset = 0;
+ _curRunLen = 0;
+ _runBitsReturned = 0;
+
+ if (0 == _bmLen) {
+ return; //no runs
+ }
+
+ // Advance to the beginning of the first non-empty run.
+ advanceToNextRun();
+ }
+
+ @Override
+ public Integer next() {
+ if( !hasNext() )
+ throw new RuntimeException("No next offset existing.");
+
+ // Grab the lookahead value
+ int ret = _runStartOffset + _runBitsReturned;
+
+ _runBitsReturned++;
+
+ // Check for end of run
+ if (_runBitsReturned == _curRunLen) {
+ advanceToNextRun();
+ }
+
+ return ret;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return _runBitsReturned < _curRunLen;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented for BitmapDecoderRLE.");
+ }
+
+ /** Move forward to the next non-empty run. */
+ private void advanceToNextRun() {
+ // While loop needed because some runs are of length 0
+ while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) {
+
+ _runBitsReturned = 0;
+
+ // Read the distance to the next run
+ char delta = _bmPtr[_bmOff + _nextBmOffset];
+
+ // Run length is stored in the next element of the array
+ _runStartOffset += delta + _curRunLen;
+ _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1];
+ _nextBmOffset += 2;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
new file mode 100644
index 0000000..8a535e1
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java
@@ -0,0 +1,392 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.DblArray;
+import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap;
+import org.apache.sysml.runtime.compress.utils.IntArrayList;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+
+/**
+ * Static functions for encoding bitmaps in various ways.
+ *
+ */
+public class BitmapEncoder
+{
+ /** Size of the blocks used in a blocked bitmap representation. */
+ public static final int BITMAP_BLOCK_SZ = 65536;
+
+ /**
+ * Generate uncompressed bitmaps for a set of columns in an uncompressed
+ * matrix block.
+ *
+ * @param colIndices
+ * indexes (within the block) of the columns to extract
+ * @param rawblock
+ * an uncompressed matrix block; can be dense or sparse
+ * @return uncompressed bitmap representation of the columns
+ * @throws DMLRuntimeException
+ */
+ public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock)
+ {
+ //note: no sparse column selection reader because low potential
+ //single column selection
+ if( colIndices.length==1 ) {
+ return extractBitmap(colIndices[0], rawblock,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ }
+ //multiple column selection (general case)
+ else {
+ ReaderColumnSelection reader = null;
+ if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT )
+ reader = new ReaderColumnSelectionSparse(rawblock, colIndices,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ else
+ reader = new ReaderColumnSelectionDense(rawblock, colIndices,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+
+ return extractBitmap(colIndices, rawblock, reader);
+ }
+ }
+
+ /**
+ *
+ * @param colIndices
+ * @param rawblock
+ * @param sampleIndexes
+ * @return
+ */
+ public static UncompressedBitmap extractBitmapFromSample(int[] colIndices,
+ MatrixBlock rawblock, int[] sampleIndexes)
+ {
+ //note: no sparse column selection reader because low potential
+
+ //single column selection
+ if( colIndices.length==1 ) {
+ return extractBitmap(colIndices[0], rawblock, sampleIndexes,
+ !CompressedMatrixBlock.MATERIALIZE_ZEROS);
+ }
+ //multiple column selection (general case)
+ else {
+ return extractBitmap(colIndices, rawblock,
+ new ReaderColumnSelectionDenseSample(rawblock, colIndices,
+ sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS));
+ }
+ }
+
+ /**
+ * Encodes the bitmap as a series of run lengths and offsets.
+ * <p>
+ * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE}
+ * !</b>
+ *
+ * @param offsets
+ * uncompressed contents of the bitmap, expressed as a list of
+ * the offsets of different bits
+ * @return compressed version of said bitmap
+ */
+ public static char[] genRLEBitmap(int[] offsets) {
+ if( offsets.length == 0 )
+ return new char[0]; //empty list
+
+ // Use an ArrayList for correctness at the expense of temp space
+ ArrayList<Character> buf = new ArrayList<Character>();
+
+ // 1 + (position of last 1 in the previous run of 1's)
+ // We add 1 because runs may be of length zero.
+ int lastRunEnd = 0;
+
+ // Offset between the end of the previous run of 1's and the first 1 in
+ // the current run. Initialized below.
+ int curRunOff;
+
+ // Length of the most recent run of 1's
+ int curRunLen = 0;
+
+ // Current encoding is as follows:
+ // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
+ // Positive entry: Entry encodes offset to next run of 1's. The next
+ // entry in the bitmap holds a run length.
+
+ // Special-case the first run to simplify the loop below.
+ int firstOff = offsets[0];
+
+ // The first run may start more than a short's worth of bits in
+ while (firstOff > Character.MAX_VALUE) {
+ buf.add(Character.MAX_VALUE);
+ buf.add((char) 0);
+ firstOff -= Character.MAX_VALUE;
+ lastRunEnd += Character.MAX_VALUE;
+ }
+
+ // Create the first run with an initial size of 1
+ curRunOff = firstOff;
+ curRunLen = 1;
+
+ // Process the remaining offsets
+ for (int i = 1; i < offsets.length; i++) {
+
+ int absOffset = offsets[i];
+
+ // 1 + (last position in run)
+ int curRunEnd = lastRunEnd + curRunOff + curRunLen;
+
+ if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
+ // End of a run, either because we hit a run of 0's or because the
+ // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
+ buf.add((char) curRunOff);
+ buf.add((char) curRunLen);
+
+ lastRunEnd = curRunEnd;
+ curRunOff = absOffset - lastRunEnd;
+
+ while (curRunOff > Character.MAX_VALUE) {
+ // SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
+ // Add zero-length runs until the offset is small enough.
+ buf.add(Character.MAX_VALUE);
+ buf.add((char) 0);
+ lastRunEnd += Character.MAX_VALUE;
+ curRunOff -= Character.MAX_VALUE;
+ }
+
+ curRunLen = 1;
+ } else {
+ // Middle of a run
+ curRunLen++;
+ }
+ }
+
+ // Close out the last run
+ if (curRunLen >= 1) {
+ buf.add((char) curRunOff);
+ buf.add((char) curRunLen);
+ }
+
+ // Convert wasteful ArrayList to packed array.
+ char[] ret = new char[buf.size()];
+ for (int i = 0; i < buf.size(); i++) {
+ ret[i] = buf.get(i);
+ }
+ return ret;
+ }
+
+ /**
+ * Encodes the bitmap in blocks of offsets. Within each block, the bits are
+ * stored as absolute offsets from the start of the block.
+ *
+ * @param offsets
+ * uncompressed contents of the bitmap, expressed as a list of
+ * the offsets of different bits
+ * @return compressed version of said bitmap
+ */
+ public static char[] genOffsetBitmap(int[] offsets)
+ {
+ int lastOffset = offsets[offsets.length - 1];
+
+ // Build up the blocks
+ int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
+ // To simplify the logic, we make two passes.
+ // The first pass divides the offsets by block.
+ int[] blockLengths = new int[numBlocks];
+ Arrays.fill(blockLengths, 0);
+
+ for (int ix = 0; ix < offsets.length; ix++) {
+ int val = offsets[ix];
+ int blockForVal = val / BITMAP_BLOCK_SZ;
+
+ blockLengths[blockForVal]++;
+ }
+
+ // The second pass creates the blocks.
+ int totalSize = numBlocks;
+ for (int block = 0; block < numBlocks; block++) {
+ totalSize += blockLengths[block];
+ }
+ char[] encodedBlocks = new char[totalSize];
+
+ int inputIx = 0;
+ int blockStartIx = 0;
+ for (int block = 0; block < numBlocks; block++) {
+ int blockSz = blockLengths[block];
+
+ // First entry in the block is number of bits
+ encodedBlocks[blockStartIx] = (char) blockSz;
+
+ for (int i = 0; i < blockSz; i++) {
+ encodedBlocks[blockStartIx + i + 1] = (char)
+ (offsets[inputIx+i] % BITMAP_BLOCK_SZ);
+ }
+
+ inputIx += blockSz;
+ blockStartIx += blockSz + 1;
+ }
+
+ return encodedBlocks;
+ }
+
+ /**
+ *
+ * @param colIndex
+ * @param rawblock
+ * @return
+ * @throws DMLRuntimeException
+ */
+ private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros)
+ {
+ //probe map for distinct items (for value or value groups)
+ DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.getNumColumns():rawblock.getNumRows();
+
+ if( rawblock.isInSparseFormat() //SPARSE
+ && CompressedMatrixBlock.TRANSPOSE_INPUT )
+ {
+ SparseBlock a = rawblock.getSparseBlock();
+ if( a != null && !a.isEmpty(colIndex) )
+ {
+ int apos = a.pos(colIndex);
+ int alen = a.size(colIndex);
+ int[] aix = a.indexes(colIndex);
+ double[] avals = a.values(colIndex);
+
+ IntArrayList lstPtr0 = new IntArrayList(); //for 0 values
+ int last = -1;
+ //iterate over non-zero entries but fill in zeros
+ for( int j=apos; j<apos+alen; j++ )
+ {
+ //fill in zero values
+ if( !skipZeros )
+ for( int k=last+1; k<aix[j]; k++ )
+ lstPtr0.appendValue(k);
+ //handle non-zero value
+ IntArrayList lstPtr = distinctVals.get(avals[j]);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(avals[j], lstPtr);
+ }
+ lstPtr.appendValue(aix[j]);
+ last = aix[j];
+ }
+ //fill in remaining zero values
+ if( !skipZeros ) {
+ for( int k=last+1; k<m; k++ )
+ lstPtr0.appendValue(k);
+ if( lstPtr0.size()>0 )
+ distinctVals.appendValue(0, lstPtr0);
+ }
+ }
+ else if( !skipZeros ) { //full 0 column
+ IntArrayList lstPtr = new IntArrayList();
+ for( int i=0; i<m; i++ )
+ lstPtr.appendValue(i);
+ distinctVals.appendValue(0, lstPtr);
+ }
+ }
+ else //GENERAL CASE
+ {
+ for( int i=0; i<m; i++ ) {
+ double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.quickGetValue(colIndex, i):
+ rawblock.quickGetValue(i, colIndex);
+ if( val!=0 || !skipZeros ) {
+ IntArrayList lstPtr = distinctVals.get(val);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(val, lstPtr);
+ }
+ lstPtr.appendValue(i);
+ }
+ }
+ }
+
+ return new UncompressedBitmap(distinctVals);
+ }
+
+ /**
+ *
+ * @param colIndex
+ * @param rawblock
+ * @param sampleIndexes
+ * @return
+ */
+ private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros)
+ {
+ //note: general case only because anyway binary search for small samples
+
+ //probe map for distinct items (for value or value groups)
+ DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ final int m = sampleIndexes.length;
+ for( int i=0; i<m; i++ ) {
+ int rowIndex = sampleIndexes[i];
+ double val = CompressedMatrixBlock.TRANSPOSE_INPUT ?
+ rawblock.quickGetValue(colIndex, rowIndex) :
+ rawblock.quickGetValue(rowIndex, colIndex);
+ if( val!=0 || !skipZeros ) {
+ IntArrayList lstPtr = distinctVals.get(val);
+ if( lstPtr == null ) {
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(val, lstPtr);
+ }
+ lstPtr.appendValue(i);
+ }
+ }
+
+ return new UncompressedBitmap(distinctVals);
+ }
+
+ /**
+ *
+ * @param colIndices
+ * @param rawblock
+ * @param rowReader
+ * @return
+ */
+ private static UncompressedBitmap extractBitmap(int[] colIndices,
+ MatrixBlock rawblock, ReaderColumnSelection rowReader)
+ {
+ //probe map for distinct items (for value or value groups)
+ DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
+
+ //scan rows and probe/build distinct items
+ DblArray cellVals = null;
+ while ((cellVals = rowReader.nextRow()) != null) {
+ IntArrayList lstPtr = distinctVals.get(cellVals);
+ if (lstPtr == null) {
+ //create new objects only on demand
+ lstPtr = new IntArrayList();
+ distinctVals.appendValue(new DblArray(cellVals), lstPtr);
+ }
+ lstPtr.appendValue(rowReader.getCurrentRowIndex());
+ }
+
+ return new UncompressedBitmap(distinctVals, colIndices.length);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
new file mode 100644
index 0000000..647a701
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class that stores information about a column group within a compressed matrix
+ * block. There are subclasses specific to each compression type.
+ *
+ */
+public abstract class ColGroup implements Serializable
+{
+ private static final long serialVersionUID = 2439785418908671481L;
+
+ public enum CompressionType {
+ UNCOMPRESSED, //uncompressed sparse/dense
+ RLE_BITMAP, //RLE bitmap
+ OLE_BITMAP; //OLE bitmap
+ }
+
+ /**
+ * Offsets of the columns that make up the column group. Zero-based, and
+ * relative to the matrix block.
+ */
+ protected int[] _colIndexes;
+
+ /** Number of rows in the matrix, for use by child classes. */
+ protected int _numRows;
+
+ /** How the elements of the column group are compressed. */
+ private CompressionType _compType;
+
+
+ /**
+ * Main constructor.
+ *
+ * @param colIndices
+ * offsets of the columns in the matrix block that make up the
+ * group
+ * @param numRows
+ * total number of rows in the parent block
+ */
+ protected ColGroup(CompressionType type, int[] colIndices, int numRows) {
+ _compType = type;
+ _colIndexes = colIndices;
+ _numRows = numRows;
+ }
+
+ /** Convenience constructor for converting indices to a more compact format. */
+ protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) {
+ _compType = type;
+ _colIndexes = new int[colIndicesList.size()];
+ int i = 0;
+ for (Integer index : colIndicesList)
+ _colIndexes[i++] = index;
+ }
+
+ /**
+ * @return offsets of the columns in the matrix block that make up the group
+ */
+ public int[] getColIndices() {
+ return _colIndexes;
+ }
+
+ /**
+ * @param col
+ * an index from 0 to the number of columns in this group - 1
+ * @return offset of the specified column in the matrix block that make up
+ * the group
+ */
+ public int getColIndex(int colNum) {
+ return _colIndexes[colNum];
+ }
+
+ public int getNumRows() {
+ return _numRows;
+ }
+
+ /**
+ * @return number of columns in this column group
+ */
+ public int getNumCols() {
+ return _colIndexes.length;
+ }
+
+ /**
+ * @return How the elements of the column group are compressed.
+ */
+ public CompressionType getCompType() {
+ return _compType;
+ }
+
+ /**
+ *
+ * @param offset
+ */
+ public void shiftColIndices(int offset) {
+ for( int i=0; i<_colIndexes.length; i++ )
+ _colIndexes[i] += offset;
+ }
+
+ /**
+ * Note: Must be overridden by child classes to account for additional data
+ * and metadata
+ *
+ * @return an upper bound on the number of bytes used to store this ColGroup
+ * in memory.
+ */
+ public long estimateInMemorySize() {
+ // int numRows (4B) , array reference colIndices (8B) + array object
+ // overhead if exists (32B) + 4B per element, CompressionType compType
+ // (2 booleans 2B + enum overhead 32B + reference to enum 8B)
+ long size = 54;
+ if (_colIndexes == null)
+ return size;
+ else
+ return size + 32 + 4 * _colIndexes.length;
+ }
+
+ /**
+ * Decompress the contents of this column group into the specified full
+ * matrix block.
+ *
+ * @param target
+ * a matrix block where the columns covered by this column group
+ * have not yet been filled in.
+ */
+ public abstract void decompressToBlock(MatrixBlock target);
+
+ /**
+ * Decompress the contents of this column group into uncompressed packed
+ * columns
+ *
+ * @param target
+ * a dense matrix block. The block must have enough space to hold
+ * the contents of this column group.
+ * @param colIndexTargets
+ * array that maps column indices in the original matrix block to
+ * columns of target.
+ */
+ public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
+
+ /**
+ *
+ * @param target dense output vector
+ * @param colpos column to decompress, error if larger or equal numCols
+ */
+ public abstract void decompressToBlock(MatrixBlock target, int colpos);
+
+
+ /**
+ * Serializes column group to data output.
+ *
+ * @param out
+ * @throws IOException
+ */
+ public abstract void write(DataOutput out)
+ throws IOException;
+
+ /**
+ * Deserializes column group from data input.
+ *
+ * @param in
+ * @throws IOException
+ */
+ public abstract void readFields(DataInput in)
+ throws IOException;
+
+
+ /**
+ * Returns the exact serialized size of column group.
+ * This can be used for example for buffer preallocation.
+ *
+ * @return
+ */
+ public abstract long getExactSizeOnDisk();
+
+ /**
+ * Multiply the slice of the matrix that this column group represents by a
+ * vector on the right.
+ *
+ * @param vector
+ * vector to multiply by (tall vector)
+ * @param result
+ * accumulator for holding the result
+ * @throws DMLRuntimeException
+ * if the internal SystemML code that performs the
+ * multiplication experiences an error
+ */
+ public abstract void rightMultByVector(MatrixBlock vector,
+ MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
+
+
+ /**
+ * Multiply the slice of the matrix that this column group represents by a
+ * row vector on the left (the original column vector is assumed to be
+ * transposed already i.e. its size now is 1xn).
+ *
+ * @param vector
+ * @param result
+ * @throws DMLRuntimeException
+ */
+ public abstract void leftMultByRowVector(MatrixBlock vector,
+ MatrixBlock result) throws DMLRuntimeException;
+
+ /**
+ * Perform the specified scalar operation directly on the compressed column
+ * group, without decompressing individual cells if possible.
+ *
+ * @param op
+ * operation to perform
+ * @return version of this column group with the operation applied
+ */
+ public abstract ColGroup scalarOperation(ScalarOperator op)
+ throws DMLRuntimeException;
+
+ /**
+ *
+ * @param op
+ * @param result
+ * @throws DMLUnsupportedOperationException
+ * @throws DMLRuntimeException
+ */
+ public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
+ throws DMLRuntimeException;
+
+ /**
+ *
+ * @param rnnz
+ */
+ protected abstract void countNonZerosPerRow(int[] rnnz);
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
new file mode 100644
index 0000000..5d60a49
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java
@@ -0,0 +1,498 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+
+/**
+ * Base class for column groups encoded with various types of bitmap encoding.
+ *
+ *
+ * NOTES:
+ * * OLE: separate storage segment length and bitmaps led to a 30% improvement
+ * but not applied because more difficult to support both data layouts at the
+ * same time (distributed/local as well as w/ and w/o low-level opt)
+ */
+public abstract class ColGroupBitmap extends ColGroup
+{
+ private static final long serialVersionUID = -1635828933479403125L;
+
+ public static final boolean LOW_LEVEL_OPT = true;
+ //sorting of values by physical length helps by 10-20%, especially for serial, while
+ //slight performance decrease for parallel incl multi-threaded, hence not applied for
+ //distributed operations (also because compression time + garbage collection increases)
+ private static final boolean SORT_VALUES_BY_LENGTH = true;
+ protected static final boolean CREATE_SKIPLIST = true;
+
+ protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+ protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
+
+ /** Distinct values associated with individual bitmaps. */
+ protected double[] _values; //linearized <numcol vals> <numcol vals>
+
+ /** Bitmaps, one per uncompressed value in {@link #values}. */
+ protected int[] _ptr; //bitmap offsets per value
+ protected char[] _data; //linearized bitmaps (variable length)
+
+ protected int[] _skiplist;
+
+ public ColGroupBitmap(CompressionType type) {
+ super(type, (int[]) null, -1);
+ }
+
+ /**
+ * Main constructor. Stores the headers for the individual bitmaps.
+ *
+ * @param colIndices
+ * indices (within the block) of the columns included in this
+ * column
+ * @param numRows
+ * total number of rows in the parent block
+ * @param ubm
+ * Uncompressed bitmap representation of the block
+ */
+ public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm)
+ {
+ super(type, colIndices, numRows);
+
+ // Extract and store just the distinct values. The bitmaps themselves go
+ // into the subclasses.
+ final int numCols = ubm.getNumColumns();
+ final int numVals = ubm.getNumValues();
+
+ _values = new double[numVals*numCols];
+ for (int i=0; i<numVals; i++) {
+ //note: deep copied internally on getValues
+ double[] tmp = ubm.getValues(i);
+ System.arraycopy(tmp, 0, _values, i*numCols, numCols);
+ }
+ }
+
+ /**
+ * Constructor for subclass methods that need to create shallow copies
+ *
+ * @param colIndices
+ * raw column index information
+ * @param numRows
+ * number of rows in the block
+ * @param values
+ * set of distinct values for the block (associated bitmaps are
+ * kept in the subclass)
+ */
+ protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) {
+ super(type, colIndices, numRows);
+ _values = values;
+ }
+
+ protected final int len(int k) {
+ return _ptr[k+1] - _ptr[k];
+ }
+
+ /**
+ *
+ * @param numVals
+ * @param totalLen
+ * @param lbitmaps
+ */
+ protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps)
+ {
+ // compact bitmaps to linearized representation
+ if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH
+ && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
+ {
+ // sort value by num segments in descending order
+ TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>();
+ for( int i=0; i<numVals; i++ ) {
+ int revlen = totalLen-lbitmaps[i].length;
+ if( !tree.containsKey(revlen) )
+ tree.put(revlen, new ArrayList<Integer>());
+ tree.get(revlen).add(i);
+ }
+
+ // compact bitmaps to linearized representation
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ int pos = 0, off = 0;
+ for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+ for( Integer tmpix : e.getValue() ) {
+ int len = lbitmaps[tmpix].length;
+ _ptr[pos] = off;
+ System.arraycopy(lbitmaps[tmpix], 0, _data, off, len);
+ off += len;
+ pos++;
+ }
+ }
+ _ptr[numVals] = totalLen;
+
+ // reorder values
+ double[] lvalues = new double[_values.length];
+ int off2 = 0; int numCols = _colIndexes.length;
+ for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) {
+ for( Integer tmpix : e.getValue() ) {
+ System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols);
+ off2 += numCols;
+ }
+ }
+ _values = lvalues;
+ }
+ else
+ {
+ // compact bitmaps to linearized representation
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ for( int i=0, off=0; i<numVals; i++ ) {
+ int len = lbitmaps[i].length;
+ _ptr[i] = off;
+ System.arraycopy(lbitmaps[i], 0, _data, off, len);
+ off += len;
+ }
+ _ptr[numVals] = totalLen;
+ }
+ }
+
+ @Override
+ public long estimateInMemorySize() {
+ long size = super.estimateInMemorySize();
+
+ // adding the size of values
+ size += 8; //array reference
+ if (_values != null) {
+ size += 32 + _values.length * 8; //values
+ }
+
+ // adding bitmaps size
+ size += 16; //array references
+ if (_data != null) {
+ size += 32 + _ptr.length * 4; // offsets
+ size += 32 + _data.length * 2; // bitmaps
+ }
+
+ return size;
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ int[] colIndices = getColIndices();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ for (int colIx = 0; colIx < numCols; colIx++) {
+ target.appendValue(row, colIndices[colIx], _values[valOff+colIx]);
+ }
+ }
+ }
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target, int[] colIndexTargets)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ for (int colIx = 0; colIx < numCols; colIx++) {
+ int origMatrixColIx = getColIndex(colIx);
+ int targetColIx = colIndexTargets[origMatrixColIx];
+ target.quickSetValue(row, targetColIx, _values[valOff+colIx]);
+ }
+ }
+ }
+ }
+
+ //generic decompression for OLE/RLE, to be overwritten for performance
+ @Override
+ public void decompressToBlock(MatrixBlock target, int colpos)
+ {
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+
+ // Run through the bitmaps for this column group
+ for (int i = 0; i < numVals; i++) {
+ Iterator<Integer> decoder = getDecodeIterator(i);
+ int valOff = i*numCols;
+
+ while (decoder.hasNext()) {
+ int row = decoder.next();
+ target.quickSetValue(row, 0, _values[valOff+colpos]);
+ }
+ }
+ }
+
+ /**
+ *
+ * @param bitmapIx
+ * @return
+ */
+ protected double sumValues(int bitmapIx)
+ {
+ final int numCols = getNumCols();
+ final int valOff = bitmapIx * numCols;
+
+ double val = 0.0;
+ for( int i = 0; i < numCols; i++ ) {
+ val += _values[valOff+i];
+ }
+
+ return val;
+ }
+
+ protected double sumValues(int bitmapIx, double[] b)
+ {
+ final int numCols = getNumCols();
+ final int valOff = bitmapIx * numCols;
+
+ double val = 0;
+ for( int i = 0; i < numCols; i++ ) {
+ val += _values[valOff+i] * b[i];
+ }
+
+ return val;
+ }
+
+ /**
+ *
+ * @param b
+ * @param c
+ */
+ protected void sumAllValues(double[] b, double[] c)
+ {
+ final int numVals = getNumValues();
+ final int numCols = getNumCols();
+
+ //vectMultiplyAdd over cols instead of dotProduct over vals because
+ //usually more values than columns
+ for( int i=0, off=0; i<numCols; i++, off+=numVals )
+ LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals);
+ }
+
+ /**
+ * Method for use by subclasses. Applies a scalar operation to the value
+ * metadata stored in the superclass.
+ *
+ * @param op
+ * scalar operation to perform
+ * @return transformed copy of value metadata for this column group
+ * @throws DMLRuntimeException
+ */
+ protected double[] applyScalarOp(ScalarOperator op)
+ throws DMLRuntimeException
+ {
+ //scan over linearized values
+ double[] ret = new double[_values.length];
+ for (int i = 0; i < _values.length; i++) {
+ ret[i] = op.executeScalar(_values[i]);
+ }
+
+ return ret;
+ }
+
+ /**
+ *
+ * @param op
+ * @param newVal
+ * @param numCols
+ * @return
+ * @throws DMLRuntimeException
+ */
+ protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols)
+ throws DMLRuntimeException
+ {
+ //scan over linearized values
+ double[] ret = new double[_values.length + numCols];
+ for( int i = 0; i < _values.length; i++ ) {
+ ret[i] = op.executeScalar(_values[i]);
+ }
+
+ //add new value to the end
+ Arrays.fill(ret, _values.length, _values.length+numCols, newVal);
+
+ return ret;
+ }
+
+ /**
+ * @return the number of distinct sets of values associated with the bitmaps
+ * in this column group
+ */
+ public int getNumValues() {
+ return _values.length / _colIndexes.length;
+ }
+
+ /**
+ *
+ * @return
+ */
+ public double[] getValues() {
+ return _values;
+ }
+
+ /**
+ *
+ * @return
+ */
+ public char[] getBitmaps() {
+ return _data;
+ }
+
+ public int[] getBitmapOffsets() {
+ return _ptr;
+ }
+
+ /**
+ * @param bmpIx
+ * index of a specific compressed bitmap (stored in subclass,
+ * index same as {@link #values})
+ * @return an object for iterating over the row offsets in this bitmap. Only
+ * valid until the next call to this method. May be reused across
+ * calls.
+ */
+ public abstract Iterator<Integer> getDecodeIterator(int bmpIx);
+
+ /**
+ * Utility function of sparse-unsafe operations.
+ *
+ * @param ind
+ * @return
+ * @throws DMLRuntimeException
+ */
+ protected int[] computeOffsets(boolean[] ind)
+ throws DMLRuntimeException
+ {
+ //determine number of offsets
+ int numOffsets = 0;
+ for( int i=0; i<ind.length; i++ )
+ numOffsets += ind[i] ? 1 : 0;
+
+ //create offset lists
+ int[] ret = new int[numOffsets];
+ for( int i=0, pos=0; i<ind.length; i++ )
+ if( ind[i] )
+ ret[pos++] = i;
+
+ return ret;
+ }
+
+ @Override
+ public void readFields(DataInput in)
+ throws IOException
+ {
+ _numRows = in.readInt();
+ int numCols = in.readInt();
+ int numVals = in.readInt();
+
+ //read col indices
+ _colIndexes = new int[ numCols ];
+ for( int i=0; i<numCols; i++ )
+ _colIndexes[i] = in.readInt();
+
+ //read distinct values
+ _values = new double[numVals*numCols];
+ for( int i=0; i<numVals*numCols; i++ )
+ _values[i] = in.readDouble();
+
+ //read bitmaps
+ int totalLen = in.readInt();
+ _ptr = new int[numVals+1];
+ _data = new char[totalLen];
+ for( int i=0, off=0; i<numVals; i++ ) {
+ int len = in.readInt();
+ _ptr[i] = off;
+ for( int j=0; j<len; j++ )
+ _data[off+j] = in.readChar();
+ off += len;
+ }
+ _ptr[numVals] = totalLen;
+ }
+
+ @Override
+ public void write(DataOutput out)
+ throws IOException
+ {
+ int numCols = getNumCols();
+ int numVals = getNumValues();
+ out.writeInt(_numRows);
+ out.writeInt(numCols);
+ out.writeInt(numVals);
+
+ //write col indices
+ for( int i=0; i<_colIndexes.length; i++ )
+ out.writeInt( _colIndexes[i] );
+
+ //write distinct values
+ for( int i=0; i<_values.length; i++ )
+ out.writeDouble(_values[i]);
+
+ //write bitmaps (lens and data, offset later recreated)
+ int totalLen = 0;
+ for( int i=0; i<numVals; i++ )
+ totalLen += len(i);
+ out.writeInt(totalLen);
+ for( int i=0; i<numVals; i++ ) {
+ int len = len(i);
+ int off = _ptr[i];
+ out.writeInt(len);
+ for( int j=0; j<len; j++ )
+ out.writeChar(_data[off+j]);
+ }
+ }
+
+ @Override
+ public long getExactSizeOnDisk() {
+ long ret = 12; //header
+ //col indices
+ ret += 4 * _colIndexes.length;
+ //distinct values (groups of values)
+ ret += 8 * _values.length;
+ //actual bitmaps
+ ret += 4; //total length
+ for( int i=0; i<getNumValues(); i++ )
+ ret += 4 + 2 * len(i);
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
new file mode 100644
index 0000000..a2b493a
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -0,0 +1,634 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.compress;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.utils.ConverterUtils;
+import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
+import org.apache.sysml.runtime.functionobjects.KahanFunction;
+import org.apache.sysml.runtime.functionobjects.KahanPlus;
+import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
+import org.apache.sysml.runtime.functionobjects.ReduceAll;
+import org.apache.sysml.runtime.functionobjects.ReduceCol;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
+import org.apache.sysml.runtime.instructions.cp.KahanObject;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with
+ * simple lists of offsets for each set of distinct values.
+ *
+ */
+public class ColGroupOLE extends ColGroupBitmap
+{
+ private static final long serialVersionUID = -9157676271360528008L;
+
+ public ColGroupOLE() {
+ super(CompressionType.OLE_BITMAP);
+ }
+
+ /**
+ * Main constructor. Constructs and stores the necessary bitmaps.
+ *
+ * @param colIndices
+ * indices (within the block) of the columns included in this
+ * column
+ * @param numRows
+ * total number of rows in the parent block
+ * @param ubm
+ * Uncompressed bitmap representation of the block
+ */
+ public ColGroupOLE(int[] colIndices, int numRows, UncompressedBitmap ubm)
+ {
+ super(CompressionType.OLE_BITMAP, colIndices, numRows, ubm);
+
+ // compress the bitmaps
+ final int numVals = ubm.getNumValues();
+ char[][] lbitmaps = new char[numVals][];
+ int totalLen = 0;
+ for( int i=0; i<numVals; i++ ) {
+ lbitmaps[i] = BitmapEncoder.genOffsetBitmap(ubm.getOffsetsList(i));
+ totalLen += lbitmaps[i].length;
+ }
+
+ // compact bitmaps to linearized representation
+ createCompressedBitmaps(numVals, totalLen, lbitmaps);
+
+
+ if( LOW_LEVEL_OPT && CREATE_SKIPLIST
+ && numRows > 2*BitmapEncoder.BITMAP_BLOCK_SZ )
+ {
+ int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ _skiplist = new int[numVals];
+ int rl = (getNumRows()/2/blksz)*blksz;
+ for (int k = 0; k < numVals; k++) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int bufIx = 0;
+ for( int i=0; i<rl && bufIx<bitmapLen; i+=blksz ) {
+ bufIx += _data[bitmapOff+bufIx] + 1;
+ }
+ _skiplist[k] = bufIx;
+ }
+ }
+ }
+
+ /**
+ * Constructor for internal use.
+ */
+ public ColGroupOLE(int[] colIndices, int numRows, double[] values, char[] bitmaps, int[] bitmapOffs) {
+ super(CompressionType.OLE_BITMAP, colIndices, numRows, values);
+ _data = bitmaps;
+ _ptr = bitmapOffs;
+ }
+
+ @Override
+ public Iterator<Integer> getDecodeIterator(int bmpIx) {
+ return new BitmapDecoderOLE(_data, _ptr[bmpIx], len(bmpIx));
+ }
+
+ @Override
+ public void decompressToBlock(MatrixBlock target)
+ {
+ if( LOW_LEVEL_OPT && getNumValues() > 1 )
+ {
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+
+ //cache blocking config and position array
+ int[] apos = new int[numVals];
+
+ //cache conscious append via horizontal scans
+ for( int bi=0; bi<n; bi+=blksz ) {
+ for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int bufIx = apos[k];
+ if( bufIx >= bitmapLen )
+ continue;
+ int len = _data[bitmapOff+bufIx];
+ int pos = bitmapOff+bufIx+1;
+ for( int i=pos; i<pos+len; i++ )
+ for( int j=0, rix = bi+_data[i]; j<numCols; j++ )
+ if( _values[off+j]!=0 )
+ target.appendValue(rix, _colIndexes[j], _values[off+j]);
+ apos[k] += len + 1;
+ }
+ }
+ }
+ else
+ {
+ //call generic decompression with decoder
+ super.decompressToBlock(target);
+ }
+ }
+
+ @Override
+ public void decompressToBlock(MatrixBlock target, int[] colixTargets)
+ {
+ if( LOW_LEVEL_OPT && getNumValues() > 1 )
+ {
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+
+ //cache blocking config and position array
+ int[] apos = new int[numVals];
+ int[] cix = new int[numCols];
+
+ //prepare target col indexes
+ for( int j=0; j<numCols; j++ )
+ cix[j] = colixTargets[_colIndexes[j]];
+
+ //cache conscious append via horizontal scans
+ for( int bi=0; bi<n; bi+=blksz ) {
+ for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int bufIx = apos[k];
+ if( bufIx >= bitmapLen )
+ continue;
+ int len = _data[bitmapOff+bufIx];
+ int pos = bitmapOff+bufIx+1;
+ for( int i=pos; i<pos+len; i++ )
+ for( int j=0, rix = bi+_data[i]; j<numCols; j++ )
+ if( _values[off+j]!=0 )
+ target.appendValue(rix, cix[j], _values[off+j]);
+ apos[k] += len + 1;
+ }
+ }
+ }
+ else
+ {
+ //call generic decompression with decoder
+ super.decompressToBlock(target, colixTargets);
+ }
+ }
+
+ @Override
+ public void decompressToBlock(MatrixBlock target, int colpos)
+ {
+ if( LOW_LEVEL_OPT && getNumValues() > 1 )
+ {
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+ double[] c = target.getDenseBlock();
+
+ //cache blocking config and position array
+ int[] apos = new int[numVals];
+
+ //cache conscious append via horizontal scans
+ for( int bi=0; bi<n; bi+=blksz ) {
+ for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int bufIx = apos[k];
+ if( bufIx >= bitmapLen )
+ continue;
+ int len = _data[bitmapOff+bufIx];
+ int pos = bitmapOff+bufIx+1;
+ for( int i=pos; i<pos+len; i++ ) {
+ c[bi+_data[i]] = _values[off+colpos];
+ }
+ apos[k] += len + 1;
+ }
+ }
+
+ target.recomputeNonZeros();
+ }
+ else
+ {
+ //call generic decompression with decoder
+ super.decompressToBlock(target, colpos);
+ }
+ }
+
+ @Override
+ public ColGroup scalarOperation(ScalarOperator op)
+ throws DMLRuntimeException
+ {
+ double val0 = op.executeScalar(0);
+
+ //fast path: sparse-safe operations
+ // Note that bitmaps don't change and are shallow-copied
+ if( op.sparseSafe || val0==0 ) {
+ return new ColGroupOLE(_colIndexes, _numRows,
+ applyScalarOp(op), _data, _ptr);
+ }
+
+ //slow path: sparse-unsafe operations (potentially create new bitmap)
+ //note: for efficiency, we currently don't drop values that become 0
+ boolean[] lind = computeZeroIndicatorVector();
+ int[] loff = computeOffsets(lind);
+ if( loff.length==0 ) { //empty offset list: go back to fast path
+ return new ColGroupOLE(_colIndexes, _numRows,
+ applyScalarOp(op), _data, _ptr);
+ }
+
+ double[] rvalues = applyScalarOp(op, val0, getNumCols());
+ char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff);
+ char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length);
+ System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
+ int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1);
+ rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length;
+
+ return new ColGroupOLE(_colIndexes, _numRows,
+ rvalues, rbitmaps, rbitmapOffs);
+ }
+
+ @Override
+ public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ double[] b = ConverterUtils.getDenseVector(vector);
+ double[] c = result.getDenseBlock();
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+
+ //prepare reduced rhs w/ relevant values
+ double[] sb = new double[numCols];
+ for (int j = 0; j < numCols; j++) {
+ sb[j] = b[_colIndexes[j]];
+ }
+
+ if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
+ {
+ //since single segment scans already exceed typical L2 cache sizes
+ //and because there is some overhead associated with blocking, the
+ //best configuration aligns with L3 cache size (x*vcores*64K*8B < L3)
+ //x=4 leads to a good yet slightly conservative compromise for single-/
+ //multi-threaded and typical number of cores and L3 cache sizes
+ final int blksz2 = ColGroupBitmap.WRITE_CACHE_BLKSZ;
+
+ //step 1: prepare position and value arrays
+
+ //current pos / values per OLs
+ int[] apos = new int[numVals];
+ double[] aval = new double[numVals];
+
+ //skip-scan to beginning for all OLs
+ if( rl > 0 ) { //rl aligned with blksz
+ int rskip = (getNumRows()/2/blksz)*blksz;
+
+ for (int k = 0; k < numVals; k++) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int start = (rl>=rskip)?rskip:0;
+ int bufIx = (rl>=rskip)?_skiplist[k]:0;
+ for( int i=start; i<rl && bufIx<bitmapLen; i+=blksz ) {
+ bufIx += _data[bitmapOff+bufIx] + 1;
+ }
+ apos[k] = bufIx;
+ }
+ }
+
+ //pre-aggregate values per OLs
+ for( int k = 0; k < numVals; k++ )
+ aval[k] = sumValues(k, sb);
+
+ //step 2: cache conscious matrix-vector via horizontal scans
+ for( int bi=rl; bi<ru; bi+=blksz2 )
+ {
+ int bimax = Math.min(bi+blksz2, ru);
+
+ //horizontal segment scan, incl pos maintenance
+ for (int k = 0; k < numVals; k++) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ double val = aval[k];
+ int bufIx = apos[k];
+
+ for( int ii=bi; ii<bimax && bufIx<bitmapLen; ii+=blksz ) {
+ //prepare length, start, and end pos
+ int len = _data[bitmapOff+bufIx];
+ int pos = bitmapOff+bufIx+1;
+
+ //compute partial results
+ //LinearAlgebraUtils.vectAdd(val, c, bitmaps, pos, ii, len);
+ for( int i=pos; i<pos+len; i++ )
+ c[ii + _data[i]] += val;
+ bufIx += len + 1;
+ }
+
+ apos[k] = bufIx;
+ }
+ }
+ }
+ else
+ {
+ //iterate over all values and their bitmaps
+ for (int k = 0; k < numVals; k++)
+ {
+ //prepare value-to-add for entire value bitmap
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ double val = sumValues(k, sb);
+
+ //iterate over bitmap blocks and add values
+ if (val != 0) {
+ int offsetBase = 0;
+ int bufIx = 0;
+ int curBlckLen;
+
+ //scan to beginning offset if necessary
+ if( rl > 0 ){
+ for (; bufIx<bitmapLen & offsetBase<rl; bufIx += curBlckLen + 1, offsetBase += blksz) {
+ curBlckLen = _data[bitmapOff+bufIx];
+ }
+ }
+
+ //compute partial results
+ for (; bufIx<bitmapLen & offsetBase<ru; bufIx += curBlckLen + 1, offsetBase += blksz) {
+ curBlckLen = _data[bitmapOff+bufIx];
+ for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) {
+ c[offsetBase + _data[bitmapOff+bufIx + blckIx]] += val;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
+ throws DMLRuntimeException
+ {
+ double[] a = ConverterUtils.getDenseVector(vector);
+ double[] c = result.getDenseBlock();
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+
+ if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
+ {
+ //cache blocking config (see matrix-vector mult for explanation)
+ final int blksz2 = ColGroupBitmap.READ_CACHE_BLKSZ;
+
+ //step 1: prepare position and value arrays
+
+ //current pos per OLs / output values
+ int[] apos = new int[numVals];
+ double[] cvals = new double[numVals];
+
+ //step 2: cache conscious matrix-vector via horizontal scans
+ for( int ai=0; ai<n; ai+=blksz2 )
+ {
+ int aimax = Math.min(ai+blksz2, n);
+
+ //horizontal segment scan, incl pos maintenance
+ for (int k = 0; k < numVals; k++) {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+ int bufIx = apos[k];
+ double vsum = 0;
+
+ for( int ii=ai; ii<aimax && bufIx<bitmapLen; ii+=blksz ) {
+ //prepare length, start, and end pos
+ int len = _data[bitmapOff+bufIx];
+ int pos = bitmapOff+bufIx+1;
+
+ //iterate over bitmap blocks and compute partial results (a[i]*1)
+ vsum += LinearAlgebraUtils.vectSum(a, _data, ii, pos, len);
+ bufIx += len + 1;
+ }
+
+ apos[k] = bufIx;
+ cvals[k] += vsum;
+ }
+ }
+
+ //step 3: scale partial results by values and write to global output
+ for (int k = 0, valOff=0; k < numVals; k++, valOff+=numCols)
+ for( int j = 0; j < numCols; j++ )
+ c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j];
+ }
+ else
+ {
+ //iterate over all values and their bitmaps
+ for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols)
+ {
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+
+ //iterate over bitmap blocks and add partial results
+ double vsum = 0;
+ for (int bix=0, off=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1, off+=blksz)
+ vsum += LinearAlgebraUtils.vectSum(a, _data, off, bitmapOff+bix+1, _data[bitmapOff+bix]);
+
+ //scale partial results by values and write results
+ for( int j = 0; j < numCols; j++ )
+ c[ _colIndexes[j] ] += vsum * _values[ valOff+j ];
+ }
+ }
+ }
+
+ @Override
+ public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
+ throws DMLRuntimeException
+ {
+ KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ?
+ KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject();
+
+ if( op.indexFn instanceof ReduceAll )
+ computeSum(result, kplus);
+ else if( op.indexFn instanceof ReduceCol )
+ computeRowSums(result, kplus);
+ else if( op.indexFn instanceof ReduceRow )
+ computeColSums(result, kplus);
+ }
+
+ /**
+ *
+ * @param result
+ */
+ private void computeSum(MatrixBlock result, KahanFunction kplus)
+ {
+ KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+
+ //iterate over all values and their bitmaps
+ final int numVals = getNumValues();
+ final int numCols = getNumCols();
+
+ for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++)
+ {
+ int bitmapOff = _ptr[bitmapIx];
+ int bitmapLen = len(bitmapIx);
+ int valOff = bitmapIx * numCols;
+
+ //iterate over bitmap blocks and count partial lengths
+ int count = 0;
+ for (int bix=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1)
+ count += _data[bitmapOff+bix];
+
+ //scale counts by all values
+ for( int j = 0; j < numCols; j++ )
+ kplus.execute3(kbuff, _values[ valOff+j ], count);
+ }
+
+ result.quickSetValue(0, 0, kbuff._sum);
+ result.quickSetValue(0, 1, kbuff._correction);
+ }
+
+ /**
+ *
+ * @param result
+ */
+ private void computeRowSums(MatrixBlock result, KahanFunction kplus)
+ {
+ KahanObject kbuff = new KahanObject(0, 0);
+
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numVals = getNumValues();
+
+ //iterate over all values and their bitmaps
+ for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++)
+ {
+ //prepare value-to-add for entire value bitmap
+ int bitmapOff = _ptr[bitmapIx];
+ int bitmapLen = len(bitmapIx);
+ double val = sumValues(bitmapIx);
+
+ //iterate over bitmap blocks and add values
+ if (val != 0) {
+ int offsetBase = 0;
+ int bufIx = 0;
+ int curBlckLen;
+ for (; bufIx < bitmapLen; bufIx += curBlckLen + 1, offsetBase += blksz) {
+ curBlckLen = _data[bitmapOff+bufIx];
+ for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) {
+ int rix = offsetBase + _data[bitmapOff+bufIx + blckIx];
+ kbuff.set(result.quickGetValue(rix, 0), result.quickGetValue(rix, 1));
+ kplus.execute2(kbuff, val);
+ result.quickSetValue(rix, 0, kbuff._sum);
+ result.quickSetValue(rix, 1, kbuff._correction);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ *
+ * @param result
+ */
+ private void computeColSums(MatrixBlock result, KahanFunction kplus)
+ {
+ KahanObject kbuff = new KahanObject(0, 0);
+
+ //iterate over all values and their bitmaps
+ final int numVals = getNumValues();
+ final int numCols = getNumCols();
+ for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++)
+ {
+ int bitmapOff = _ptr[bitmapIx];
+ int bitmapLen = len(bitmapIx);
+ int valOff = bitmapIx * numCols;
+
+ //iterate over bitmap blocks and count partial lengths
+ int count = 0;
+ for (int bix=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1)
+ count += _data[bitmapOff+bix];
+
+ //scale counts by all values
+ for( int j = 0; j < numCols; j++ ) {
+ kbuff.set(result.quickGetValue(0, _colIndexes[j]),result.quickGetValue(1, _colIndexes[j]));
+ kplus.execute3(kbuff, _values[ valOff+j ], count);
+ result.quickSetValue(0, _colIndexes[j], kbuff._sum);
+ result.quickSetValue(1, _colIndexes[j], kbuff._correction);
+ }
+ }
+ }
+
+ /**
+ * Utility function of sparse-unsafe operations.
+ *
+ * @return
+ * @throws DMLRuntimeException
+ */
+ private boolean[] computeZeroIndicatorVector()
+ throws DMLRuntimeException
+ {
+ boolean[] ret = new boolean[_numRows];
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numVals = getNumValues();
+
+ //initialize everything with zero
+ Arrays.fill(ret, true);
+
+ //iterate over all values and their bitmaps
+ for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++)
+ {
+ //prepare value-to-add for entire value bitmap
+ int bitmapOff = _ptr[bitmapIx];
+ int bitmapLen = len(bitmapIx);
+
+ //iterate over bitmap blocks and add values
+ int offsetBase = 0;
+ int bufIx = 0;
+ int curBlckLen;
+ for (; bufIx < bitmapLen; bufIx += curBlckLen + 1, offsetBase += blksz) {
+ curBlckLen = _data[bitmapOff+bufIx];
+ for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) {
+ ret[offsetBase + _data[bitmapOff+bufIx + blckIx]] &= false;
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ @Override
+ protected void countNonZerosPerRow(int[] rnnz)
+ {
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numVals = getNumValues();
+ final int numCols = getNumCols();
+
+ //iterate over all values and their bitmaps
+ for (int k = 0; k < numVals; k++)
+ {
+ //prepare value-to-add for entire value bitmap
+ int bitmapOff = _ptr[k];
+ int bitmapLen = len(k);
+
+ //iterate over bitmap blocks and add values
+ int offsetBase = 0;
+ int curBlckLen;
+ for (int bufIx=0; bufIx<bitmapLen; bufIx+=curBlckLen+1, offsetBase+=blksz) {
+ curBlckLen = _data[bitmapOff+bufIx];
+ for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) {
+ rnnz[offsetBase + _data[bitmapOff+bufIx + blckIx]] += numCols;
+ }
+ }
+ }
+ }
+}