You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2020/09/17 19:04:11 UTC

[systemds] branch master updated: [SYSTEMDS-2461] Additional debug output slice finding algorithm

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 651d59f  [SYSTEMDS-2461] Additional debug output slice finding algorithm
651d59f is described below

commit 651d59f02a578f521408f0c40d99a1b66e004f56
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Thu Sep 17 21:00:26 2020 +0200

    [SYSTEMDS-2461] Additional debug output slice finding algorithm
    
    This patch collects the number of enumerated slices and top-k
    characteristics per level into a debug matrix and returns it to the
    user. Furthermore, this also includes a minor additional pruning
    extension (by min error).
---
 scripts/builtin/slicefinder.dml                            | 14 ++++++++++----
 .../fed/MultiReturnParameterizedBuiltinFEDInstruction.java |  1 -
 .../fed/ParameterizedBuiltinFEDInstruction.java            |  1 -
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index 731f3f9..5a20fb5 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -33,13 +33,17 @@
 # ------------------------------------------------------------
 # TK        top-k slices (k x ncol(X) if successful) 
 # TKC       score, size, error of slices (k x 3)
+# D         debug matrix, populated with enumeration stats if verbose
 # ------------------------------------------------------------
 
 m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
     Integer k = 4, Integer maxL = 0, Integer minSup = 32, Double alpha = 0.5,
     Boolean tpEval = TRUE, Integer tpBlksz = 16, Boolean verbose = FALSE)
-  return(Matrix[Double] TK, Matrix[Double] TKC)
+  return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D)
 {
+  # init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
+  D = matrix(0, 0, 5); 
+  
   m = nrow(X);
   n = ncol(X);
   
@@ -62,6 +66,7 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
   if( verbose ) {
     [maxsc, minsc] = analyzeTopK(TKC);
     print("SliceFinder: initial top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc)
+    D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc, minsc))));
   }
 
   # lattice enumeration w/ size/error pruning, one iteration per level
@@ -99,9 +104,10 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
 
     if(verbose) {
       [maxsc, minsc] = analyzeTopK(TKC);
-      valid = as.integer(sum(R[,4]>=minSup));
+      valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
       print(" -- valid slices after eval: "+valid+"/"+nrow(S));
       print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
+      D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
     }
   }
 
@@ -123,12 +129,12 @@ createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e,
   merr = t(colMaxs(X2 * e)); # maximum error vector
 
   if( verbose ) {
-    drop = as.integer(sum(cCnts < minSup));
+    drop = as.integer(sum(cCnts < minSup | err == 0));
     print("SliceFinder: dropping "+drop+"/"+n2+" features below minSup = "+minSup+".");
   }
 
   # working set of active slices (#attr x #slices) and top k
-  selCols = (cCnts >= minSup);
+  selCols = (cCnts >= minSup & err > 0);
   attr = removeEmpty(target=seq(1,n2), margin="rows", select=selCols);
   ss = removeEmpty(target=cCnts, margin="rows", select=selCols);
   se = removeEmpty(target=err, margin="rows", select=selCols);
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
index e1ecb19..05c1901 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
@@ -41,7 +41,6 @@ import org.apache.sysds.runtime.instructions.cp.Data;
 import org.apache.sysds.runtime.matrix.data.FrameBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.Operator;
-import org.apache.sysds.runtime.privacy.PrivacyMonitor;
 import org.apache.sysds.runtime.transform.encode.Encoder;
 import org.apache.sysds.runtime.transform.encode.EncoderBin;
 import org.apache.sysds.runtime.transform.encode.EncoderComposite;
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
index 762ed68..f549dca 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
@@ -51,7 +51,6 @@ import org.apache.sysds.runtime.matrix.operators.Operator;
 import org.apache.sysds.runtime.matrix.operators.SimpleOperator;
 import org.apache.sysds.runtime.meta.MatrixCharacteristics;
 import org.apache.sysds.runtime.meta.MetaDataFormat;
-import org.apache.sysds.runtime.privacy.PrivacyMonitor;
 import org.apache.sysds.runtime.transform.decode.Decoder;
 import org.apache.sysds.runtime.transform.decode.DecoderFactory;
 import org.apache.sysds.runtime.transform.encode.Encoder;