You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2020/09/15 19:14:04 UTC
[systemds] 02/02: [SYSTEMDS-2641] Improved slicefinder builtin
(hybrid tp, pruning)
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 95128102e5269aaa5754283b9d2760b78a8e746c
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Tue Sep 15 21:13:13 2020 +0200
[SYSTEMDS-2641] Improved slicefinder builtin (hybrid tp, pruning)
- New hybrid task-parallel execution w/ exposed block size (2x
performance improvement for many slices)
- New pruning before pair enumeration for zero error
- Slice extraction via separate permutation matrix multiplies
---
scripts/builtin/slicefinder.dml | 30 +++++++++++++---------
.../functions/builtin/BuiltinSliceFinderTest.java | 2 +-
src/test/scripts/functions/builtin/slicefinder.dml | 2 +-
3 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index 6b00b0c..731f3f9 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -26,8 +26,9 @@
# maxL maximum level L (conjunctions of L predicates), 0 unlimited
# minSup minimum support (min number of rows per slice)
# alpha weight [0,1]: 0 only size, 1 only error
-# dpEval flag for data-parallel slice evaluation,
-# otherwise task-parallel
+# tpEval flag for task-parallel slice evaluation,
+# otherwise data-parallel
+# tpBlksz block size for task-parallel execution (num slices)
# verbose flag for verbose debug output
# ------------------------------------------------------------
# TK top-k slices (k x ncol(X) if successful)
@@ -36,7 +37,7 @@
m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
Integer k = 4, Integer maxL = 0, Integer minSup = 32, Double alpha = 0.5,
- Boolean dpEval = FALSE, Boolean verbose = FALSE)
+ Boolean tpEval = TRUE, Integer tpBlksz = 16, Boolean verbose = FALSE)
return(Matrix[Double] TK, Matrix[Double] TKC)
{
m = nrow(X);
@@ -80,15 +81,19 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
}
# extract and evaluate candidate slices
- if( dpEval ) { #data-parallel
- R = evalSlice(X2, e, eAvg, t(S), level, alpha);
- }
- else { # task-parallel
+ if( tpEval ) { # task-parallel
+ # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector
R = matrix(0, nrow(S), 4)
- parfor( i in 1:nrow(S) )
- R[i,] = evalSlice(X2, e, eAvg, t(S[i,]), level, alpha);
+ parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
+ beg = (i-1)*tpBlksz + 1;
+ end = min(i*tpBlksz, nrow(R));
+ R[beg:end,] = evalSlice(X2, e, eAvg, t(S[beg:end,]), level, alpha);
+ }
}
-
+ else { # data-parallel
+ R = evalSlice(X2, e, eAvg, t(S), level, alpha);
+ }
+
# maintain top-k after evaluation
[TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
@@ -199,7 +204,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
{
# prune invalid slices (possible without affecting overall
# pruning effectiveness due to handling of missing parents)
- pI = (R[,4] >= minSup);
+ pI = (R[,4] >= minSup & R[,2] > 0);
S = removeEmpty(target=S, margin="rows", select=pI)
R = removeEmpty(target=R, margin="rows", select=pI)
@@ -219,7 +224,8 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
P1 = table(seq(1,nrow(rix)), rix, nrow(rix), nrow(S));
P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S));
P12 = P1 + P2; # combined slice
- P = (P12 %*% S) != 0;
+ P = (P1 %*% S + P2 %*% S) != 0;
+
se = min(P1 %*% R[,2], P2 %*% R[,2])
sm = min(P1 %*% R[,3], P2 %*% R[,3])
ss = min(P1 %*% R[,4], P2 %*% R[,4])
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
index e918f5d..53a3499 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
@@ -104,7 +104,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase {
//setOutputBuffering(false);
fullDMLScriptName = HOME + dml_test_name + ".dml";
programArgs = new String[]{"-args", data,
- String.valueOf(K),String.valueOf(dp).toUpperCase(),
+ String.valueOf(K),String.valueOf(!dp).toUpperCase(),
String.valueOf(VERBOSE).toUpperCase(), output("R")};
runTest(true, false, null, -1);
diff --git a/src/test/scripts/functions/builtin/slicefinder.dml b/src/test/scripts/functions/builtin/slicefinder.dml
index 442ef79..96d5313 100644
--- a/src/test/scripts/functions/builtin/slicefinder.dml
+++ b/src/test/scripts/functions/builtin/slicefinder.dml
@@ -37,6 +37,6 @@ yhat = X %*% B;
e = (y-yhat)^2;
# call slice finding
-[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, dpEval=$3, verbose=$4);
+[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, tpEval=$3, verbose=$4);
write(TKC, $5)