You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2020/09/17 19:04:11 UTC
[systemds] branch master updated: [SYSTEMDS-2461] Additional debug
output slice finding algorithm
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 651d59f [SYSTEMDS-2461] Additional debug output slice finding algorithm
651d59f is described below
commit 651d59f02a578f521408f0c40d99a1b66e004f56
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Thu Sep 17 21:00:26 2020 +0200
[SYSTEMDS-2461] Additional debug output slice finding algorithm
This patch collects the number of enumerated slices and top-k
characteristics per level into a debug matrix and returns it to the
user. Furthermore, this also includes a minor additional pruning
extension (by min error).
---
scripts/builtin/slicefinder.dml | 14 ++++++++++----
.../fed/MultiReturnParameterizedBuiltinFEDInstruction.java | 1 -
.../fed/ParameterizedBuiltinFEDInstruction.java | 1 -
3 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml
index 731f3f9..5a20fb5 100644
--- a/scripts/builtin/slicefinder.dml
+++ b/scripts/builtin/slicefinder.dml
@@ -33,13 +33,17 @@
# ------------------------------------------------------------
# TK top-k slices (k x ncol(X) if successful)
# TKC score, size, error of slices (k x 3)
+# D debug matrix, populated with enumeration stats if verbose
# ------------------------------------------------------------
m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
Integer k = 4, Integer maxL = 0, Integer minSup = 32, Double alpha = 0.5,
Boolean tpEval = TRUE, Integer tpBlksz = 16, Boolean verbose = FALSE)
- return(Matrix[Double] TK, Matrix[Double] TKC)
+ return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D)
{
+ # init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
+ D = matrix(0, 0, 5);
+
m = nrow(X);
n = ncol(X);
@@ -62,6 +66,7 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
if( verbose ) {
[maxsc, minsc] = analyzeTopK(TKC);
print("SliceFinder: initial top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc)
+ D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc, minsc))));
}
# lattice enumeration w/ size/error pruning, one iteration per level
@@ -99,9 +104,10 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e,
if(verbose) {
[maxsc, minsc] = analyzeTopK(TKC);
- valid = as.integer(sum(R[,4]>=minSup));
+ valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
print(" -- valid slices after eval: "+valid+"/"+nrow(S));
print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
+ D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
}
}
@@ -123,12 +129,12 @@ createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e,
merr = t(colMaxs(X2 * e)); # maximum error vector
if( verbose ) {
- drop = as.integer(sum(cCnts < minSup));
+ drop = as.integer(sum(cCnts < minSup | err == 0));
print("SliceFinder: dropping "+drop+"/"+n2+" features below minSup = "+minSup+".");
}
# working set of active slices (#attr x #slices) and top k
- selCols = (cCnts >= minSup);
+ selCols = (cCnts >= minSup & err > 0);
attr = removeEmpty(target=seq(1,n2), margin="rows", select=selCols);
ss = removeEmpty(target=cCnts, margin="rows", select=selCols);
se = removeEmpty(target=err, margin="rows", select=selCols);
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
index e1ecb19..05c1901 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/fed/MultiReturnParameterizedBuiltinFEDInstruction.java
@@ -41,7 +41,6 @@ import org.apache.sysds.runtime.instructions.cp.Data;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.operators.Operator;
-import org.apache.sysds.runtime.privacy.PrivacyMonitor;
import org.apache.sysds.runtime.transform.encode.Encoder;
import org.apache.sysds.runtime.transform.encode.EncoderBin;
import org.apache.sysds.runtime.transform.encode.EncoderComposite;
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
index 762ed68..f549dca 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/fed/ParameterizedBuiltinFEDInstruction.java
@@ -51,7 +51,6 @@ import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.SimpleOperator;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
import org.apache.sysds.runtime.meta.MetaDataFormat;
-import org.apache.sysds.runtime.privacy.PrivacyMonitor;
import org.apache.sysds.runtime.transform.decode.Decoder;
import org.apache.sysds.runtime.transform.decode.DecoderFactory;
import org.apache.sysds.runtime.transform.encode.Encoder;