You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/04/17 01:57:24 UTC
incubator-systemml git commit: [SYSTEMML-1530] Performance compressed
tsmm (ddc decomp, alloc, result)
Repository: incubator-systemml
Updated Branches:
refs/heads/master 499428072 -> e1ae4a9f9
[SYSTEMML-1530] Performance compressed tsmm (ddc decomp, alloc, result)
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e1ae4a9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e1ae4a9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e1ae4a9f
Branch: refs/heads/master
Commit: e1ae4a9f93cad3f513ce3de8597cec29b7263205
Parents: 4994280
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Apr 16 15:54:36 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Apr 16 18:59:11 2017 -0700
----------------------------------------------------------------------
.../sysml/runtime/compress/ColGroupDDC1.java | 51 +++++++--------
.../sysml/runtime/compress/ColGroupDDC2.java | 10 +++
.../sysml/runtime/compress/ColGroupOLE.java | 57 +++++++----------
.../sysml/runtime/compress/ColGroupRLE.java | 67 +++++++++-----------
.../runtime/compress/CompressedMatrixBlock.java | 36 +++++++----
.../compress/utils/LinearAlgebraUtils.java | 25 +++-----
.../runtime/matrix/data/LibMatrixMult.java | 2 +-
7 files changed, 118 insertions(+), 130 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
index 82adb55..1ff07ac 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java
@@ -169,6 +169,16 @@ public class ColGroupDDC1 extends ColGroupDDC
}
@Override
+ public void decompressToBlock(MatrixBlock target, int colpos) {
+ int nrow = getNumRows();
+ int ncol = getNumCols();
+ double[] c = target.getDenseBlock();
+ for( int i = 0; i < nrow; i++ )
+ c[i] = _values[(_data[i]&0xFF)*ncol+colpos];
+ target.recomputeNonZeros();
+ }
+
+ @Override
protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
final int ncol = getNumCols();
final int numVals = getNumValues();
@@ -243,36 +253,19 @@ public class ColGroupDDC1 extends ColGroupDDC
final int ncol = getNumCols();
final int numVals = getNumValues();
- if( 8*numVals < getNumRows() )
- {
- //iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
- //temporary array also avoids false sharing in multi-threaded environments
- double[] vals = allocDVector(numVals, true);
- for( int i=0; i<nrow; i++ ) {
- vals[_data[i]&0xFF] += a[i];
- }
-
- //post-scaling of pre-aggregate with distinct values
- for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
- double aval = vals[k];
- for( int j=0; j<ncol; j++ ) {
- int colIx = _colIndexes[j];
- c[colIx] += aval * _values[valOff+j];
- }
- }
+ //iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
+ //temporary array also avoids false sharing in multi-threaded environments
+ double[] vals = allocDVector(numVals, true);
+ for( int i=0; i<nrow; i++ ) {
+ vals[_data[i]&0xFF] += a[i];
}
- else //general case
- {
- //iterate over codes, compute all, and add to the result
- for( int i=0; i<nrow; i++ ) {
- double aval = a[i];
- if( aval != 0 ) {
- int valOff = (_data[i]&0xFF) * ncol;
- for( int j=0; j<ncol; j++ ) {
- int colIx = _colIndexes[j];
- c[colIx] += aval * _values[valOff+j];
- }
- }
+
+ //post-scaling of pre-aggregate with distinct values
+ for( int k=0, valOff=0; k<numVals; k++, valOff+=ncol ) {
+ double aval = vals[k];
+ for( int j=0; j<ncol; j++ ) {
+ int colIx = _colIndexes[j];
+ c[colIx] += aval * _values[valOff+j];
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
index 66ba149..b4de6c3 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java
@@ -171,6 +171,16 @@ public class ColGroupDDC2 extends ColGroupDDC
}
@Override
+ public void decompressToBlock(MatrixBlock target, int colpos) {
+ int nrow = getNumRows();
+ int ncol = getNumCols();
+ double[] c = target.getDenseBlock();
+ for( int i = 0; i < nrow; i++ )
+ c[i] = _values[_data[i]*ncol+colpos];
+ target.recomputeNonZeros();
+ }
+
+ @Override
protected void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
final int ncol = getNumCols();
final int numVals = getNumValues();
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
index 80e53d4..bc1d421 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -201,41 +201,32 @@ public class ColGroupOLE extends ColGroupOffset
@Override
public void decompressToBlock(MatrixBlock target, int colpos)
{
- if( LOW_LEVEL_OPT && getNumValues() > 1 )
- {
- final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
- final int numCols = getNumCols();
- final int numVals = getNumValues();
- final int n = getNumRows();
- double[] c = target.getDenseBlock();
-
- //cache blocking config and position array
- int[] apos = new int[numVals];
-
- //cache conscious append via horizontal scans
- for( int bi=0; bi<n; bi+=blksz ) {
- for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
- int boff = _ptr[k];
- int blen = len(k);
- int bix = apos[k];
- if( bix >= blen )
- continue;
- int len = _data[boff+bix];
- int pos = boff+bix+1;
- for( int i=pos; i<pos+len; i++ ) {
- c[bi+_data[i]] = _values[off+colpos];
- }
- apos[k] += len + 1;
+ final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+ double[] c = target.getDenseBlock();
+
+ //cache blocking config and position array
+ int[] apos = allocIVector(numVals, true);
+
+ //cache conscious append via horizontal scans
+ for( int bi=0; bi<n; bi+=blksz ) {
+ for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
+ int boff = _ptr[k];
+ int blen = len(k);
+ int bix = apos[k];
+ if( bix >= blen )
+ continue;
+ int len = _data[boff+bix];
+ int pos = boff+bix+1;
+ for( int i=pos; i<pos+len; i++ ) {
+ c[bi+_data[i]] = _values[off+colpos];
}
- }
-
- target.recomputeNonZeros();
- }
- else
- {
- //call generic decompression with decoder
- super.decompressToBlock(target, colpos);
+ apos[k] += len + 1;
+ }
}
+ target.recomputeNonZeros();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
index 63786d0..0e50262 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
@@ -192,47 +192,38 @@ public class ColGroupRLE extends ColGroupOffset
@Override
public void decompressToBlock(MatrixBlock target, int colpos)
{
- if( LOW_LEVEL_OPT && getNumValues() > 1 )
- {
- final int blksz = 128 * 1024;
- final int numCols = getNumCols();
- final int numVals = getNumValues();
- final int n = getNumRows();
- double[] c = target.getDenseBlock();
-
- //position and start offset arrays
- int[] apos = new int[numVals];
- int[] astart = new int[numVals];
-
- //cache conscious append via horizontal scans
- for( int bi=0; bi<n; bi+=blksz ) {
- int bimax = Math.min(bi+blksz, n);
- for (int k=0, off=0; k < numVals; k++, off+=numCols) {
- int boff = _ptr[k];
- int blen = len(k);
- int bix = apos[k];
- if( bix >= blen )
- continue;
- int start = astart[k];
- for( ; bix<blen & start<bimax; bix+=2) {
- start += _data[boff + bix];
- int len = _data[boff + bix+1];
- for( int i=start; i<start+len; i++ )
- c[i] = _values[off+colpos];
- start += len;
- }
- apos[k] = bix;
- astart[k] = start;
+ final int blksz = 128 * 1024;
+ final int numCols = getNumCols();
+ final int numVals = getNumValues();
+ final int n = getNumRows();
+ double[] c = target.getDenseBlock();
+
+ //position and start offset arrays
+ int[] astart = new int[numVals];
+ int[] apos = allocIVector(numVals, true);
+
+ //cache conscious append via horizontal scans
+ for( int bi=0; bi<n; bi+=blksz ) {
+ int bimax = Math.min(bi+blksz, n);
+ for (int k=0, off=0; k < numVals; k++, off+=numCols) {
+ int boff = _ptr[k];
+ int blen = len(k);
+ int bix = apos[k];
+ if( bix >= blen )
+ continue;
+ int start = astart[k];
+ for( ; bix<blen & start<bimax; bix+=2) {
+ start += _data[boff + bix];
+ int len = _data[boff + bix+1];
+ for( int i=start; i<start+len; i++ )
+ c[i] = _values[off+colpos];
+ start += len;
}
+ apos[k] = bix;
+ astart[k] = start;
}
-
- target.recomputeNonZeros();
- }
- else
- {
- //call generic decompression with decoder
- super.decompressToBlock(target, colpos);
}
+ target.recomputeNonZeros();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index ead0b19..d7c66f1 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -102,7 +102,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
public static final boolean TRANSPOSE_INPUT = true;
public static final boolean MATERIALIZE_ZEROS = false;
public static final long MIN_PAR_AGG_THRESHOLD = 16*1024*1024; //16MB
- public static final boolean INVESTIGATE_ESTIMATES = false;
+ public static boolean INVESTIGATE_ESTIMATES = false;
public static boolean ALLOW_DDC_ENCODING = true;
private static final boolean LDEBUG = true; //local debug flag
private static final Level LDEBUG_LEVEL = Level.DEBUG; //DEBUG/TRACE for details
@@ -912,7 +912,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
BinaryOperator bop = new BinaryOperator(Multiply.getMultiplyFnObject());
LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
}
- leftMultByVectorTranspose(_colGroups, tmp, out, true);
+ leftMultByVectorTranspose(_colGroups, tmp, out, true, true);
//System.out.println("Compressed MMChain in "+time.stop());
@@ -1003,7 +1003,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
if( op.getNumThreads()>1 )
leftMultByVectorTranspose(_colGroups, mb, ret, false, op.getNumThreads());
else
- leftMultByVectorTranspose(_colGroups, mb, ret, false);
+ leftMultByVectorTranspose(_colGroups, mb, ret, false, true);
}
else {
//NOTE: we could decompress and invoke super.aggregateBinary but for now
@@ -1221,6 +1221,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
leftMultByTransposeSelf(_colGroups, out, 0, _colGroups.size());
// post-processing
+ LinearAlgebraUtils.copyUpperToLowerTriangle(out);
out.recomputeNonZeros();
}
@@ -1278,6 +1279,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
}
// post-processing
+ LinearAlgebraUtils.copyUpperToLowerTriangle(out);
out.recomputeNonZeros();
}
@@ -1402,7 +1404,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
* @param doTranspose if true, transpose vector
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
- private static void leftMultByVectorTranspose(List<ColGroup> colGroups, MatrixBlock vector, MatrixBlock result, boolean doTranspose)
+ private static void leftMultByVectorTranspose(List<ColGroup> colGroups, MatrixBlock vector, MatrixBlock result, boolean doTranspose, boolean allocTmp)
throws DMLRuntimeException
{
//transpose vector if required
@@ -1417,7 +1419,8 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
result.allocateDenseBlock();
// setup memory pool for reuse
- ColGroupValue.setupThreadLocalMemory(getMaxNumValues(colGroups));
+ if( allocTmp )
+ ColGroupValue.setupThreadLocalMemory(getMaxNumValues(colGroups));
// delegate matrix-vector operation to each column group
for (ColGroup grp : colGroups) {
@@ -1425,7 +1428,8 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
}
// post-processing
- ColGroupValue.cleanupThreadLocalMemory();
+ if( allocTmp )
+ ColGroupValue.cleanupThreadLocalMemory();
result.recomputeNonZeros();
}
@@ -1488,9 +1492,14 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
final int numRows = groups.get(0).getNumRows();
final int numGroups = groups.size();
- //preallocated dense matrix block
- MatrixBlock lhs = new MatrixBlock(numRows, 1, false);
+ //preallocated dense tmp matrix blocks
+ MatrixBlock lhs = new MatrixBlock(1, numRows, false);
+ MatrixBlock tmpret = new MatrixBlock(1, result.getNumColumns(), false);
lhs.allocateDenseBlock();
+ tmpret.allocateDenseBlock();
+
+ // setup memory pool for reuse
+ ColGroupValue.setupThreadLocalMemory(getMaxNumValues(groups));
//approach: for each colgroup, extract uncompressed columns one at-a-time
//vector-matrix multiplies against remaining col groups
@@ -1504,19 +1513,22 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
//for all uncompressed lhs columns vectors
for( int j=0; j<ixgroup.length; j++ ) {
//decompress single column
- lhs.reset(numRows, 1, false);
+ if( !(group instanceof ColGroupDDC) )
+ lhs.reset(1, numRows, false);
group.decompressToBlock(lhs, j);
if( !lhs.isEmptyBlock(false) ) {
//compute vector-matrix partial result
- MatrixBlock tmpret = new MatrixBlock(1,result.getNumColumns(),false);
- leftMultByVectorTranspose(tmpList, lhs, tmpret, true);
+ leftMultByVectorTranspose(tmpList, lhs, tmpret, false, false);
//write partial results (disjoint non-zeros)
- LinearAlgebraUtils.copyNonZerosToRowCol(result, tmpret, ixgroup[j]);
+ LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[j]);
}
}
}
+
+ //post processing
+ ColGroupValue.cleanupThreadLocalMemory();
}
@SuppressWarnings("unchecked")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
index 4cc86ed..515457b 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
@@ -147,25 +147,16 @@ public class LinearAlgebraUtils
return val;
}
- public static void copyUpperToLowerTriangle( MatrixBlock ret )
- {
- double[] c = ret.getDenseBlock();
- final int m = ret.getNumRows();
- final int n = ret.getNumColumns();
-
- //copy symmetric values
- for( int i=0, uix=0; i<m; i++, uix+=n )
- for( int j=i+1, lix=j*n+i; j<n; j++, lix+=n )
- c[ lix ] = c[ uix+j ];
+ public static void copyUpperToLowerTriangle( MatrixBlock ret ) {
+ LibMatrixMult.copyUpperToLowerTriangle(ret);
}
-
- public static void copyNonZerosToRowCol( MatrixBlock ret, MatrixBlock tmp, int ix )
- {
+
+ public static void copyNonZerosToUpperTriangle( MatrixBlock ret, MatrixBlock tmp, int ix ) {
+ double[] a = tmp.getDenseBlock();
for(int i=0; i<tmp.getNumColumns(); i++) {
- double val = tmp.quickGetValue(0, i);
- if( val != 0 ) {
- ret.setValueDenseUnsafe(ix, i, val);
- ret.setValueDenseUnsafe(i, ix, val);
+ if( a[i] != 0 ) {
+ ret.setValueDenseUnsafe(
+ (ix<i)?ix:i, (ix<i)?i:ix, a[i]);
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e1ae4a9f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 65f3be1..d746aa3 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -3414,7 +3414,7 @@ public class LibMatrixMult
*
* @param ret matrix
*/
- private static void copyUpperToLowerTriangle( MatrixBlock ret )
+ public static void copyUpperToLowerTriangle( MatrixBlock ret )
{
double[] c = ret.denseBlock;
final int m = ret.rlen;