You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/03/29 21:41:35 UTC

[systemds] branch master updated: [MINOR] Improved robustness slicefinder builtin for model debugging

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 88fb94a  [MINOR] Improved robustness slicefinder builtin for model debugging
88fb94a is described below

commit 88fb94afe48f20b051081d7aa6b2966478487330
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Mon Mar 29 23:19:48 2021 +0200

    [MINOR] Improved robustness slicefinder builtin for model debugging
    
    This patch improves the early termination at level L if pair enumeration
    returns 0 candidate slices for evaluation by avoiding an unnecessary
    level evaluation. With specific blocksize configurations (other than 1),
    the unnecessary level evaluation even led to runtime exceptions because
    the intended indexing [1:0] of empty blocks (with zero rows or columns)
    is invalid.
---
 scripts/builtin/slicefinder.dml                    | 62 +++++++++++-----------
 .../functions/builtin/BuiltinSliceFinderTest.java  | 58 ++++++++++++--------
 .../scripts/functions/builtin/slicefinderPrep.dml  | 22 ++++++--
 3 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index a9a1dff..651fd0e 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -43,14 +43,14 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4,
     Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE)
   return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D)
 {
-	t1 = time();
-	
+  t1 = time();
+
   # init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
   D = matrix(0, 0, 5); 
-  
+
   m = nrow(X);
   n = ncol(X);
-  
+
   # prepare offset vectors and one-hot encoded X
   fdom = colMaxs(X);
   foffb = t(cumsum(t(fdom))) - fdom;
@@ -96,30 +96,32 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4,
       print(" -- generated paired slice candidates: "+nrS+" -> "+nrow(S));
     }
 
-    # extract and evaluate candidate slices
-    if( tpEval ) { # task-parallel
-      # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector 
-      R = matrix(0, nrow(S), 4)
-      parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
-        beg = (i-1)*tpBlksz + 1; 
-        end = min(i*tpBlksz, nrow(R));
-        R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha);
+    if( nrow(S) > 0 ) {
+      # extract and evaluate candidate slices
+      if( tpEval ) { # task-parallel
+        # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector 
+        R = matrix(0, nrow(S), 4)
+        parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
+          beg = (i-1)*tpBlksz + 1; 
+          end = min(i*tpBlksz, nrow(R));
+          R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha);
+        }
+      }
+      else { # data-parallel
+        R = evalSlice(X2, e, eAvg, t(S2), level, alpha);
       }
-    }
-    else { # data-parallel
-      R = evalSlice(X2, e, eAvg, t(S2), level, alpha);
-    }
-    
-    # maintain top-k after evaluation
-    [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
 
-    if(verbose) {
-      [maxsc, minsc] = analyzeTopK(TKC);
-      valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
-      print(" -- valid slices after eval: "+valid+"/"+nrow(S));
-      print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
-      print(" -- (time="+(time()-t1)+")")
-      D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
+      # maintain top-k after evaluation
+      [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
+
+      if(verbose) {
+        [maxsc, minsc] = analyzeTopK(TKC);
+        valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
+        print(" -- valid slices after eval: "+valid+"/"+nrow(S));
+        print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
+        print(" -- (time="+(time()-t1)+")")
+        D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
+      }
     }
   }
 
@@ -171,7 +173,7 @@ scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm,
 {
   # Initial upper bound equation (with minSup and ss in pos/neg terms)
   # sc = alpha * ((se/minSup) / eAvg - 1) - (1-alpha) * (n/ss - 1);
-  
+
   # Since sc is either monotonically increasing or decreasing, we
   # probe interesting points of sc in the interval [minSup, ss],
   # and compute the maximum to serve as the upper bound 
@@ -229,14 +231,14 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
   # join compatible slices (without self)
   join = S %*% t(S) == (level-2)
   I = upper.tri(target=join, diag=FALSE, values=TRUE);
-  
+
   # pair construction
   nr = nrow(I); nc = ncol(I);
   rix = matrix(I * seq(1,nr), nr*nc, 1);
   cix = matrix(I * t(seq(1,nc)), nr*nc, 1);
   rix = removeEmpty(target=rix, margin="rows");
   cix = removeEmpty(target=cix, margin="rows");
-  
+
   P = matrix(0,0,ncol(S))
   if( sum(rix)!=0 ) {
     P1 = table(seq(1,nrow(rix)), rix, nrow(rix), nrow(S));
@@ -300,7 +302,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
 
     # apply all pruning 
     fall = (fSizes & fScores & fParents);
-    
+
     # deduplication of join outputs
     Dedup = removeEmpty(target=map, margin="rows", select=fall) != 0
     #P = (Dedup %*% P) != 0, replaced by below (easier sparsity propagation)
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
index 3055a82..497215c 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java
@@ -59,82 +59,92 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
 
 	@Test
 	public void testTop4HybridDP() {
-		runSliceFinderTest(4, true, false, ExecMode.HYBRID);
+		runSliceFinderTest(4, "e", true, false, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop4SinglenodeDP() {
-		runSliceFinderTest(4, true, false, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(4, "e", true, false, ExecMode.SINGLE_NODE);
 	}
 	
 	@Test
 	public void testTop4HybridTP() {
-		runSliceFinderTest(4, false, false, ExecMode.HYBRID);
+		runSliceFinderTest(4, "e", false, false, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop4SinglenodeTP() {
-		runSliceFinderTest(4, false, false, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(4, "e", false, false, ExecMode.SINGLE_NODE);
 	}
 
 	@Test
 	public void testTop10HybridDP() {
-		runSliceFinderTest(10, true, false, ExecMode.HYBRID);
+		runSliceFinderTest(10, "e", true, false, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop10SinglenodeDP() {
-		runSliceFinderTest(10, true, false, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(10, "e", true, false, ExecMode.SINGLE_NODE);
 	}
 	
 	@Test
 	public void testTop10HybridTP() {
-		runSliceFinderTest(10, false, false, ExecMode.HYBRID);
+		runSliceFinderTest(10, "e", false, false, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop10SinglenodeTP() {
-		runSliceFinderTest(10, false, false, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(10, "e", false, false, ExecMode.SINGLE_NODE);
 	}
 
 	@Test
 	public void testTop4HybridDPSel() {
-		runSliceFinderTest(4, true, true, ExecMode.HYBRID);
+		runSliceFinderTest(4, "e", true, true, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop4SinglenodeDPSel() {
-		runSliceFinderTest(4, true, true, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(4, "e", true, true, ExecMode.SINGLE_NODE);
 	}
 	
 	@Test
 	public void testTop4HybridTPSel() {
-		runSliceFinderTest(4, false, true, ExecMode.HYBRID);
+		runSliceFinderTest(4, "e", false, true, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop4SinglenodeTPSel() {
-		runSliceFinderTest(4, false, true, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(4, "e", false, true, ExecMode.SINGLE_NODE);
 	}
 
 	@Test
 	public void testTop10HybridDPSel() {
-		runSliceFinderTest(10, true, true, ExecMode.HYBRID);
+		runSliceFinderTest(10, "e", true, true, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop10SinglenodeDPSel() {
-		runSliceFinderTest(10, true, true, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(10, "e", true, true, ExecMode.SINGLE_NODE);
 	}
 	
 	@Test
 	public void testTop10HybridTPSel() {
-		runSliceFinderTest(10, false, true, ExecMode.HYBRID);
+		runSliceFinderTest(10, "e", false, true, ExecMode.HYBRID);
 	}
 	
 	@Test
 	public void testTop10SinglenodeTPSel() {
-		runSliceFinderTest(10, false, true, ExecMode.SINGLE_NODE);
+		runSliceFinderTest(10, "e", false, true, ExecMode.SINGLE_NODE);
+	}
+	
+	@Test
+	public void testTop10HybridTPSelE2() {
+		runSliceFinderTest(10, "oe", false, true, ExecMode.HYBRID);
+	}
+	
+	@Test
+	public void testTop10SinglenodeTPSelE2() {
+		runSliceFinderTest(10, "oe", false, true, ExecMode.SINGLE_NODE);
 	}
 	
 //	@Test
@@ -142,7 +152,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
 //		runSliceFinderTest(10, false, ExecMode.SPARK);
 //	}
 	
-	private void runSliceFinderTest(int K, boolean dp, boolean selCols, ExecMode mode) {
+	private void runSliceFinderTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) {
 		ExecMode platformOld = setExecMode(mode);
 		loadTestConfiguration(getTestConfiguration(TEST_NAME));
 		String HOME = SCRIPT_DIR + TEST_DIR;
@@ -153,7 +163,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
 			
 			//run data preparation
 			fullDMLScriptName = HOME + PREP_NAME + ".dml";
-			programArgs = new String[]{"-args", data, output("X"), output("e")};
+			programArgs = new String[]{"-args", data, err, output("X"), output("e")};
 			runTest(true, false, null, -1);
 			
 			//read output and store for dml and R
@@ -180,11 +190,13 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase
 			TestUtils.compareMatrices(dmlfile, rfile, 1e-2, "Stat-DML", "Stat-R");
 			
 			//compare expected results
-			double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile);
-			if( mode != ExecMode.SPARK ) //TODO why only CP correct, but R always matches? test framework?
-				for(int i=0; i<K; i++)
-					TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2);
-		
+			if( err.equals("e") ) {
+				double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile);
+				if( mode != ExecMode.SPARK ) //TODO why only CP correct, but R always matches? test framework?
+					for(int i=0; i<K; i++)
+						TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2);
+			}
+			
 			//ensure proper inlining, despite initially multiple calls and large function
 			Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
 		}
diff --git a/src/test/scripts/functions/builtin/slicefinderPrep.dml b/src/test/scripts/functions/builtin/slicefinderPrep.dml
index 036c0ff..2eaa8ac 100644
--- a/src/test/scripts/functions/builtin/slicefinderPrep.dml
+++ b/src/test/scripts/functions/builtin/slicefinderPrep.dml
@@ -31,10 +31,24 @@ jspec= "{ ids:true, recode:[1,2,3,6],bin:["
 [X,M] = transformencode(target=F, spec=jspec);
 X = X[,2:ncol(X)]
 
+if( $2 == "oe" ) {
+  m = nrow(X)
+  n = ncol(X)
+  fdom = colMaxs(X);
+  foffb = t(cumsum(t(fdom))) - fdom;
+  foffe = t(cumsum(t(fdom)))
+  rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1)
+  cix = matrix(X + foffb, m*n, 1);
+  X2 = table(rix, cix); #one-hot encoded
+}
+else {
+  X2 = X;
+}
+
 # learn model
-B = lm(X=X, y=y, verbose=FALSE);
-yhat = X %*% B;
+B = lm(X=X2, y=y, verbose=FALSE);
+yhat = X2 %*% B;
 e = (y-yhat)^2;
 
-write(X, $2)
-write(e, $3)
+write(X, $3)
+write(e, $4)