You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/03/29 21:41:35 UTC
[systemds] branch master updated: [MINOR] Improved robustness
slicefinder builtin for model debugging
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 88fb94a [MINOR] Improved robustness slicefinder builtin for model debugging
88fb94a is described below
commit 88fb94afe48f20b051081d7aa6b2966478487330
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Mon Mar 29 23:19:48 2021 +0200
[MINOR] Improved robustness slicefinder builtin for model debugging
This patch improves the early termination at level L if pair enumeration
returns 0 candidate slices for evaluation by avoiding an unnecessary
level evaluation. With specific blocksize configurations (other than 1),
the unnecessary level evaluation even led to runtime exceptions because
the intended indexing [1:0] of empty blocks (with zero rows or columns)
is invalid.
---
scripts/builtin/slicefinder.dml | 62 +++++++++++-----------
.../functions/builtin/BuiltinSliceFinderTest.java | 58 ++++++++++++--------
.../scripts/functions/builtin/slicefinderPrep.dml | 22 ++++++--
3 files changed, 85 insertions(+), 57 deletions(-)
diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index a9a1dff..651fd0e 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -43,14 +43,14 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4,
Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE)
return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D)
{
- t1 = time();
-
+ t1 = time();
+
# init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
D = matrix(0, 0, 5);
-
+
m = nrow(X);
n = ncol(X);
-
+
# prepare offset vectors and one-hot encoded X
fdom = colMaxs(X);
foffb = t(cumsum(t(fdom))) - fdom;
@@ -96,30 +96,32 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4,
print(" -- generated paired slice candidates: "+nrS+" -> "+nrow(S));
}
- # extract and evaluate candidate slices
- if( tpEval ) { # task-parallel
- # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector
- R = matrix(0, nrow(S), 4)
- parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
- beg = (i-1)*tpBlksz + 1;
- end = min(i*tpBlksz, nrow(R));
- R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha);
+ if( nrow(S) > 0 ) {
+ # extract and evaluate candidate slices
+ if( tpEval ) { # task-parallel
+ # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector
+ R = matrix(0, nrow(S), 4)
+ parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
+ beg = (i-1)*tpBlksz + 1;
+ end = min(i*tpBlksz, nrow(R));
+ R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha);
+ }
+ }
+ else { # data-parallel
+ R = evalSlice(X2, e, eAvg, t(S2), level, alpha);
}
- }
- else { # data-parallel
- R = evalSlice(X2, e, eAvg, t(S2), level, alpha);
- }
-
- # maintain top-k after evaluation
- [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
- if(verbose) {
- [maxsc, minsc] = analyzeTopK(TKC);
- valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
- print(" -- valid slices after eval: "+valid+"/"+nrow(S));
- print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
- print(" -- (time="+(time()-t1)+")")
- D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
+ # maintain top-k after evaluation
+ [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
+
+ if(verbose) {
+ [maxsc, minsc] = analyzeTopK(TKC);
+ valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
+ print(" -- valid slices after eval: "+valid+"/"+nrow(S));
+ print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
+ print(" -- (time="+(time()-t1)+")")
+ D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
+ }
}
}
@@ -171,7 +173,7 @@ scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm,
{
# Initial upper bound equation (with minSup and ss in pos/neg terms)
# sc = alpha * ((se/minSup) / eAvg - 1) - (1-alpha) * (n/ss - 1);
-
+
# Since sc is either monotonically increasing or decreasing, we
# probe interesting points of sc in the interval [minSup, ss],
# and compute the maximum to serve as the upper bound
@@ -229,14 +231,14 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
# join compatible slices (without self)
join = S %*% t(S) == (level-2)
I = upper.tri(target=join, diag=FALSE, values=TRUE);
-
+
# pair construction
nr = nrow(I); nc = ncol(I);
rix = matrix(I * seq(1,nr), nr*nc, 1);
cix = matrix(I * t(seq(1,nc)), nr*nc, 1);
rix = removeEmpty(target=rix, margin="rows");
cix = removeEmpty(target=cix, margin="rows");
-
+
P = matrix(0,0,ncol(S))
if( sum(rix)!=0 ) {
P1 = table(seq(1,nrow(rix)), rix, nrow(rix), nrow(S));
@@ -300,7 +302,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
# apply all pruning
fall = (fSizes & fScores & fParents);
-
+
# deduplication of join outputs
Dedup = removeEmpty(target=map, margin="rows", select=fall) != 0
#P = (Dedup %*% P) != 0, replaced by below (easier sparsity propagation)
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
index 3055a82..497215c 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
@@ -59,82 +59,92 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
@Test
public void testTop4HybridDP() {
- runSliceFinderTest(4, true, false, ExecMode.HYBRID);
+ runSliceFinderTest(4, "e", true, false, ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDP() {
- runSliceFinderTest(4, true, false, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(4, "e", true, false, ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTP() {
- runSliceFinderTest(4, false, false, ExecMode.HYBRID);
+ runSliceFinderTest(4, "e", false, false, ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTP() {
- runSliceFinderTest(4, false, false, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(4, "e", false, false, ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDP() {
- runSliceFinderTest(10, true, false, ExecMode.HYBRID);
+ runSliceFinderTest(10, "e", true, false, ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDP() {
- runSliceFinderTest(10, true, false, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(10, "e", true, false, ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTP() {
- runSliceFinderTest(10, false, false, ExecMode.HYBRID);
+ runSliceFinderTest(10, "e", false, false, ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTP() {
- runSliceFinderTest(10, false, false, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(10, "e", false, false, ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPSel() {
- runSliceFinderTest(4, true, true, ExecMode.HYBRID);
+ runSliceFinderTest(4, "e", true, true, ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPSel() {
- runSliceFinderTest(4, true, true, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(4, "e", true, true, ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPSel() {
- runSliceFinderTest(4, false, true, ExecMode.HYBRID);
+ runSliceFinderTest(4, "e", false, true, ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPSel() {
- runSliceFinderTest(4, false, true, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(4, "e", false, true, ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPSel() {
- runSliceFinderTest(10, true, true, ExecMode.HYBRID);
+ runSliceFinderTest(10, "e", true, true, ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPSel() {
- runSliceFinderTest(10, true, true, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(10, "e", true, true, ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSel() {
- runSliceFinderTest(10, false, true, ExecMode.HYBRID);
+ runSliceFinderTest(10, "e", false, true, ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSel() {
- runSliceFinderTest(10, false, true, ExecMode.SINGLE_NODE);
+ runSliceFinderTest(10, "e", false, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2() {
+ runSliceFinderTest(10, "oe", false, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2() {
+ runSliceFinderTest(10, "oe", false, true, ExecMode.SINGLE_NODE);
}
// @Test
@@ -142,7 +152,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
// runSliceFinderTest(10, false, ExecMode.SPARK);
// }
- private void runSliceFinderTest(int K, boolean dp, boolean selCols, ExecMode mode) {
+ private void runSliceFinderTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) {
ExecMode platformOld = setExecMode(mode);
loadTestConfiguration(getTestConfiguration(TEST_NAME));
String HOME = SCRIPT_DIR + TEST_DIR;
@@ -153,7 +163,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
//run data preparation
fullDMLScriptName = HOME + PREP_NAME + ".dml";
- programArgs = new String[]{"-args", data, output("X"), output("e")};
+ programArgs = new String[]{"-args", data, err, output("X"), output("e")};
runTest(true, false, null, -1);
//read output and store for dml and R
@@ -180,11 +190,13 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
TestUtils.compareMatrices(dmlfile, rfile, 1e-2, "Stat-DML", "Stat-R");
//compare expected results
- double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile);
- if( mode != ExecMode.SPARK ) //TODO why only CP correct, but R always matches? test framework?
- for(int i=0; i<K; i++)
- TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2);
-
+ if( err.equals("e") ) {
+ double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile);
+ if( mode != ExecMode.SPARK ) //TODO why only CP correct, but R always matches? test framework?
+ for(int i=0; i<K; i++)
+ TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2);
+ }
+
//ensure proper inlining, despite initially multiple calls and large function
Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
}
diff --git a/src/test/scripts/functions/builtin/slicefinderPrep.dml b/src/test/scripts/functions/builtin/slicefinderPrep.dml
index 036c0ff..2eaa8ac 100644
--- a/src/test/scripts/functions/builtin/slicefinderPrep.dml
+++ b/src/test/scripts/functions/builtin/slicefinderPrep.dml
@@ -31,10 +31,24 @@ jspec= "{ ids:true, recode:[1,2,3,6],bin:["
[X,M] = transformencode(target=F, spec=jspec);
X = X[,2:ncol(X)]
+if( $2 == "oe" ) {
+ m = nrow(X)
+ n = ncol(X)
+ fdom = colMaxs(X);
+ foffb = t(cumsum(t(fdom))) - fdom;
+ foffe = t(cumsum(t(fdom)))
+ rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1)
+ cix = matrix(X + foffb, m*n, 1);
+ X2 = table(rix, cix); #one-hot encoded
+}
+else {
+ X2 = X;
+}
+
# learn model
-B = lm(X=X, y=y, verbose=FALSE);
-yhat = X %*% B;
+B = lm(X=X2, y=y, verbose=FALSE);
+yhat = X2 %*% B;
e = (y-yhat)^2;
-write(X, $2)
-write(e, $3)
+write(X, $3)
+write(e, $4)