You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2020/09/15 19:14:04 UTC

[systemds] 02/02: [SYSTEMDS-2641] Improved slicefinder builtin (hybrid tp, pruning)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 95128102e5269aaa5754283b9d2760b78a8e746c
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Tue Sep 15 21:13:13 2020 +0200

    [SYSTEMDS-2641] Improved slicefinder builtin (hybrid tp, pruning)
    
    - New hybrid task-parallel execution w/ exposed block size (2x
    performance improvement for many slices)
    - New pruning before pair enumeration for zero error
    - Slice extraction via separate permutation matrix multiplies
---
 scripts/builtin/slicefinder.dml                    | 30 +++++++++++++---------
 .../functions/builtin/BuiltinSliceFinderTest.java  |  2 +-
 src/test/scripts/functions/builtin/slicefinder.dml |  2 +-
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index 6b00b0c..731f3f9 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -26,8 +26,9 @@
 # maxL      maximum level L (conjunctions of L predicates), 0 unlimited
 # minSup    minimum support (min number of rows per slice)
 # alpha     weight [0,1]: 0 only size, 1 only error
-# dpEval    flag for data-parallel slice evaluation, 
-#           otherwise task-parallel
+# tpEval    flag for task-parallel slice evaluation, 
+#           otherwise data-parallel
+# tpBlksz   block size for task-parallel execution (num slices) 
 # verbose   flag for verbose debug output 
 # ------------------------------------------------------------
 # TK        top-k slices (k x ncol(X) if successful) 
@@ -36,7 +37,7 @@
 
 m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
     Integer k = 4, Integer maxL = 0, Integer minSup = 32, Double alpha = 0.5,
-    Boolean dpEval = FALSE, Boolean verbose = FALSE)
+    Boolean tpEval = TRUE, Integer tpBlksz = 16, Boolean verbose = FALSE)
   return(Matrix[Double] TK, Matrix[Double] TKC)
 {
   m = nrow(X);
@@ -80,15 +81,19 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
     }
 
     # extract and evaluate candidate slices
-    if( dpEval ) { #data-parallel
-      R = evalSlice(X2, e, eAvg, t(S), level, alpha);
-    }
-    else { # task-parallel
+    if( tpEval ) { # task-parallel
+      # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector 
       R = matrix(0, nrow(S), 4)
-      parfor( i in 1:nrow(S) )
-        R[i,] = evalSlice(X2, e, eAvg, t(S[i,]), level, alpha);
+      parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
+        beg = (i-1)*tpBlksz + 1; 
+        end = min(i*tpBlksz, nrow(R));
+        R[beg:end,] = evalSlice(X2, e, eAvg, t(S[beg:end,]), level, alpha);
+      }
     }
-
+    else { # data-parallel
+      R = evalSlice(X2, e, eAvg, t(S), level, alpha);
+    }
+    
     # maintain top-k after evaluation
     [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
 
@@ -199,7 +204,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
 {
   # prune invalid slices (possible without affecting overall
   # pruning effectiveness due to handling of missing parents)
-  pI = (R[,4] >= minSup);
+  pI = (R[,4] >= minSup & R[,2] > 0);
   S = removeEmpty(target=S, margin="rows", select=pI)
   R = removeEmpty(target=R, margin="rows", select=pI)
 
@@ -219,7 +224,8 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
     P1 = table(seq(1,nrow(rix)), rix, nrow(rix), nrow(S));
     P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S));
     P12 = P1 + P2; # combined slice
-    P = (P12 %*% S) != 0;
+    P = (P1 %*% S + P2 %*% S) != 0;
+
     se = min(P1 %*% R[,2], P2 %*% R[,2])
     sm = min(P1 %*% R[,3], P2 %*% R[,3])
     ss = min(P1 %*% R[,4], P2 %*% R[,4])
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
index e918f5d..53a3499 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
@@ -104,7 +104,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase {
 			//setOutputBuffering(false);
 			fullDMLScriptName = HOME + dml_test_name + ".dml";
 			programArgs = new String[]{"-args", data,
-				String.valueOf(K),String.valueOf(dp).toUpperCase(),
+				String.valueOf(K),String.valueOf(!dp).toUpperCase(),
 				String.valueOf(VERBOSE).toUpperCase(), output("R")};
 			runTest(true, false, null, -1);
 
diff --git a/src/test/scripts/functions/builtin/slicefinder.dml b/src/test/scripts/functions/builtin/slicefinder.dml
index 442ef79..96d5313 100644
--- a/src/test/scripts/functions/builtin/slicefinder.dml
+++ b/src/test/scripts/functions/builtin/slicefinder.dml
@@ -37,6 +37,6 @@ yhat = X %*% B;
 e = (y-yhat)^2;
 
 # call slice finding
-[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, dpEval=$3, verbose=$4);
+[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, tpEval=$3, verbose=$4);
 
 write(TKC, $5)