You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2021/09/09 11:58:28 UTC

[systemds] branch master updated: [MINOR] Passing quantiles as function parameters in winsorize builtin This commit also removes the parfor from logical pipelines' enumerator to stabilize the results

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 88f1063  [MINOR] Passing quantiles as function parameters in winsorize builtin   This commit also removes the parfor from logical pipelines' enumerator to stabilize the results
88f1063 is described below

commit 88f1063f0197d7197f90ec1cb1113bfc8173b12b
Author: Shafaq Siddiqi <sh...@tugraz.at>
AuthorDate: Wed Sep 8 16:29:35 2021 +0200

    [MINOR] Passing quantiles as function parameters in winsorize builtin
      This commit also removes the parfor from logical pipelines' enumerator to stabilize the results
    
    Closes #1387.
---
 scripts/builtin/winsorize.dml                           | 17 +++++++++--------
 scripts/pipelines/properties/param.csv                  |  2 +-
 scripts/pipelines/scripts/enumerateLogical.dml          |  2 +-
 src/test/scripts/functions/builtin/multipleBuiltins.dml |  2 +-
 src/test/scripts/functions/builtin/winsorize.dml        |  2 +-
 src/test/scripts/functions/builtin/winsorizeFoo.dml     |  2 +-
 .../scripts/functions/misc/FunPotpourriMultiEval.dml    |  2 +-
 src/test/scripts/functions/misc/Functions15b.dml        |  2 +-
 8 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/scripts/builtin/winsorize.dml b/scripts/builtin/winsorize.dml
index 5e7eb61..614630d 100644
--- a/scripts/builtin/winsorize.dml
+++ b/scripts/builtin/winsorize.dml
@@ -19,18 +19,19 @@
 #
 #-------------------------------------------------------------
 
-m_winsorize = function(Matrix[Double] X, Boolean verbose) return (Matrix[Double] Y) {
+m_winsorize = function(Matrix[Double] X, Double ql = 0.05, Double qu = 0.95, Boolean verbose) 
+return (Matrix[Double] Y) {
+
   Y = matrix(0, nrow(X), ncol(X))
-  parfor(i in 1:ncol(X))
-    Y[,i] = fixOutliersWinsorize(X[,i])
+  parfor(i in 1:ncol(X), check=0) {
+    q1 = quantile(X[,i], ql)
+    q2 = quantile(X[,i], qu)
+    Y[, i] = fixOutliersWinsorize(X[,i], q1, q2)
+  }
 }
 
-fixOutliersWinsorize = function(Matrix[Double] X) return (Matrix[Double] Y)
+fixOutliersWinsorize = function(Matrix[Double] X,  Double ql, Double qu) return (Matrix[Double] Y)
 {
-  # compute quantiles for lower and upper probs
-  q = quantile(X, matrix("0.05 0.95", rows=2, cols=1));
-  ql = as.scalar(q[1,1]);
-  qu = as.scalar(q[2,1]);
   # replace values outside [ql,qu] w/ ql and qu respectively
   Y = ifelse(X < ql, ql, X);
   Y = ifelse(Y > qu, qu, Y);
diff --git a/scripts/pipelines/properties/param.csv b/scripts/pipelines/properties/param.csv
index fc454c4..e2068b1 100644
--- a/scripts/pipelines/properties/param.csv
+++ b/scripts/pipelines/properties/param.csv
@@ -1,7 +1,7 @@
 name,param_no,maskFlag,FDFlag,yFlag,verboseFlag,dataFlag,dt1,dt2,dt3,dt4,st1,en1,st2,en2,st3,en3,st4,en4
 outlierByIQR,3,0,0,0,1,0,FP,INT,INT,1,7,2,2,1,1,,,
 outlierBySd,3,0,0,0,1,0,INT,INT,INT,1,7,1,2,2,1,,,
-winsorize,0,0,0,0,1,0,,,,,,,,,,,,
+winsorize,2,0,0,0,1,0,FP,FP,0.01,0.05,0.95,1,,,,,,
 normalize,0,0,0,0,0,0,,,,,,,,,,,,
 imputeByMean,0,1,0,0,0,2,,,,,,,,,,,,
 imputeByMedian,0,1,0,0,0,2,,,,,,,,,,,,
diff --git a/scripts/pipelines/scripts/enumerateLogical.dml b/scripts/pipelines/scripts/enumerateLogical.dml
index 29ac78c..977c0a2 100644
--- a/scripts/pipelines/scripts/enumerateLogical.dml
+++ b/scripts/pipelines/scripts/enumerateLogical.dml
@@ -85,7 +85,7 @@ return (Frame[Unknown] bestLg, Double pre_best)
     # # # execute the physical pipelines
     scores = matrix(0, nrow(physicalPipList), 1)
     # TODO better parfor-dep handling of multi-assignments to avoid check=0 
-    parfor(i in 1:length(physicalPipList), check=0) {
+    for(i in 1:length(physicalPipList), check=0) {
       lp2 = as.frame(logicalPipList[((i-1)%/%num_inst)+1,])
       pp2 = as.frame(physicalPipList[i,])
       # # append configuration keys for extracting the pipeline later on
diff --git a/src/test/scripts/functions/builtin/multipleBuiltins.dml b/src/test/scripts/functions/builtin/multipleBuiltins.dml
index 7a8315b..a771f59 100644
--- a/src/test/scripts/functions/builtin/multipleBuiltins.dml
+++ b/src/test/scripts/functions/builtin/multipleBuiltins.dml
@@ -20,6 +20,6 @@
 #-------------------------------------------------------------
 
 X = read($1);
-Y = winsorize(X, FALSE);
+Y = winsorize(X=X, verbose=FALSE);
 Z = outlier(Y, FALSE);
 write(Z, $2);
diff --git a/src/test/scripts/functions/builtin/winsorize.dml b/src/test/scripts/functions/builtin/winsorize.dml
index eeba09d..00725e4 100644
--- a/src/test/scripts/functions/builtin/winsorize.dml
+++ b/src/test/scripts/functions/builtin/winsorize.dml
@@ -20,5 +20,5 @@
 #-------------------------------------------------------------
 
 X = read($1);
-Y = winsorize(X, FALSE);
+Y = winsorize(X=X, ql=0.05, qu= 0.95, verbose=FALSE);
 write(Y, $2)
diff --git a/src/test/scripts/functions/builtin/winsorizeFoo.dml b/src/test/scripts/functions/builtin/winsorizeFoo.dml
index 78472bd..9c1c53b 100644
--- a/src/test/scripts/functions/builtin/winsorizeFoo.dml
+++ b/src/test/scripts/functions/builtin/winsorizeFoo.dml
@@ -25,5 +25,5 @@ foo = function(Matrix[Double] X, Boolean verbose)
    while(FALSE){} #no inlining
    if( verbose )
      print( min(X)+" "+max(X) )
-   R = winsorize(X, verbose);
+   R = winsorize(X=X, verbose=verbose);
 }
diff --git a/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml b/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
index 72b7b7b..3d5fe37 100644
--- a/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
+++ b/src/test/scripts/functions/misc/FunPotpourriMultiEval.dml
@@ -23,7 +23,7 @@ X = rand(rows=10, cols= 10)
 t1 = interQuartileMean(X[,7]);
 
 for(i in 1:5)
-  X = eval("winsorize", list(X, FALSE))
+  X = eval("winsorize", list(X=X, ql = 0.05, qu=0.95, verbose=FALSE))
 
 t2 = interQuartileMean(X[,7]);
 print("expected=TRUE, actual="+(t2 < t1))
diff --git a/src/test/scripts/functions/misc/Functions15b.dml b/src/test/scripts/functions/misc/Functions15b.dml
index 6834f49..074c7e4 100644
--- a/src/test/scripts/functions/misc/Functions15b.dml
+++ b/src/test/scripts/functions/misc/Functions15b.dml
@@ -22,5 +22,5 @@
 foo = function(Matrix[Double] X)
   return (Matrix[Double] Y)
 {
-  Y = winsorize(X, FALSE)
+  Y = winsorize(X=X, verbose=FALSE)
 }