You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2021/07/05 16:12:37 UTC

[systemds] branch master updated: [MINOR] cleanups in cleaning tests

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 56eb216  [MINOR] cleanups in cleaning tests
56eb216 is described below

commit 56eb216420194533bc205b0daa913dc12320f0f6
Author: Shafaq Siddiqi <sh...@tugraz.at>
AuthorDate: Mon Jul 5 18:12:10 2021 +0200

    [MINOR] cleanups in cleaning tests
---
 scripts/builtin/bandit.dml                                         | 4 ++--
 .../sysds/test/functions/pipelines/CleaningTestClassification.java | 3 +--
 src/test/scripts/functions/pipelines/intermediates/hyperparams.csv | 7 ++-----
 src/test/scripts/functions/pipelines/intermediates/logical.csv     | 2 +-
 src/test/scripts/functions/pipelines/intermediates/pipelines.csv   | 7 ++-----
 src/test/scripts/functions/pipelines/testClassification.dml        | 5 +++--
 src/test/scripts/functions/pipelines/testLogical.dml               | 2 +-
 7 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 5c24dab..8770cdc 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -95,7 +95,7 @@ m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, List[Unknown
 
       # sort the configurations for successive halving
       avergae_perf =  getMaxPerConf(outPip, nrow(configurations)) 
-      configurations = frameSort(cbind(avergae_perf, configurations))
+      configurations = frameSort(cbind(avergae_perf, configurations), TRUE)
       configurations = configurations[, 2:ncol(configurations)]
     }
     bracket_pipel = removeEmpty(target=bracket_pipel, margin="rows")
@@ -405,7 +405,7 @@ extractTopK = function(Frame[Unknown] pipeline, Matrix[Double] hyperparam,
 
   # sort results
   hyperparam = order(target = hyperparam, by = 1, decreasing=TRUE, index.return=FALSE)
-  pipeline = frameSort(pipeline)
+  pipeline = frameSort(pipeline, TRUE)
 
 
   # remove the row with accuracy less than test accuracy 
diff --git a/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java b/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
index 8144d89..9043fdf 100644
--- a/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
+++ b/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
@@ -40,7 +40,6 @@ public class CleaningTestClassification extends AutomatedTestBase {
 	private final static String CLEAN = DATA_DIR+ "clean.csv";
 	private final static String META = RESOURCE+ "meta/meta_census.csv";
 	private final static String OUTPUT = RESOURCE+"intermediates/";
-	private final static String LOGICAL = RESOURCE+"intermediates/logical.csv";
 
 	private static final String PARAM_DIR = "./scripts/pipelines/properties/";
 	private final static String PARAM = PARAM_DIR + "param.csv";
@@ -79,7 +78,7 @@ public class CleaningTestClassification extends AutomatedTestBase {
 			loadTestConfiguration(getTestConfiguration(TEST_NAME1));
 			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
 			programArgs = new String[] {"-stats", "-exec", "singlenode", "-nvargs", "dirtyData="+DIRTY,
-				"metaData="+META, "primitives="+PRIMITIVES, "parameters="+PARAM, "logical="+LOGICAL,
+				"metaData="+META, "primitives="+PRIMITIVES, "parameters="+PARAM,
 				"sampleSize="+ sample, "topk="+ topk, "rv="+ resources, "cv="+ crossfold,
 				"weighted="+ weightedAccuracy, "output="+OUTPUT, "target="+target, "cleanData="+CLEAN,
 				"O="+output("O")};
diff --git a/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv b/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
index 2e19fb1..a4729a4 100644
--- a/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
@@ -1,5 +1,2 @@
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,86.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,67.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,102.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,104.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,51.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+12.0,0,1.0,0,0,0,2.0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+12.0,0,1.0,0,0,0,2.0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/src/test/scripts/functions/pipelines/intermediates/logical.csv b/src/test/scripts/functions/pipelines/intermediates/logical.csv
index d3be4ee..20d1e3c 100644
--- a/src/test/scripts/functions/pipelines/intermediates/logical.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/logical.csv
@@ -1 +1 @@
-MVI,OTLR,DUMMY,DIM
+MVI,DUMMY,DIM
diff --git a/src/test/scripts/functions/pipelines/intermediates/pipelines.csv b/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
index f798164..f20554a 100644
--- a/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
@@ -1,5 +1,2 @@
-imputeByMean,scale,dummycoding,m_pca
-imputeByMean,scale,dummycoding,m_pca
-imputeByMedian,scale,dummycoding,m_pca
-imputeByMedian,scale,dummycoding,m_pca
-imputeByMean,scale,dummycoding,m_pca
+imputeByMean,dummycoding
+imputeByMedian,dummycoding
diff --git a/src/test/scripts/functions/pipelines/testClassification.dml b/src/test/scripts/functions/pipelines/testClassification.dml
index f83bea3..62147e5 100644
--- a/src/test/scripts/functions/pipelines/testClassification.dml
+++ b/src/test/scripts/functions/pipelines/testClassification.dml
@@ -29,7 +29,7 @@ F = read($dirtyData, data_type="frame", format="csv", header=TRUE,
 metaInfo = read($metaData, data_type="frame", format="csv", header=FALSE);
 primitives = read($primitives, data_type = "frame", format="csv", header= TRUE)
 param = read($parameters, data_type = "frame", format="csv", header= TRUE)
-logical = read($logical, data_type = "frame", format="csv", header= FALSE)
+logical = frame(["MVI", "DUMMY"], rows=1, cols=2)
 sample = $sampleSize
 topK = $topk
 resources = $rv
@@ -80,7 +80,8 @@ getSchema = getSchema[, 1:ncol(getSchema) - 1] # strip the mask of class label
 
 # 5. find the best hyper parameters for classification algorithm
 # for now only find the best values for intercept and maximum outer iteration
-opt = utils::getOpByTarget(eX, eY, getMask, targetApplicaton)
+opt = matrix("0 0 100", rows=1, cols=3)
+
 
 # 6. get the cross validated accuracy on dirty dataset (only on training set)
 d_accuracy = 0
diff --git a/src/test/scripts/functions/pipelines/testLogical.dml b/src/test/scripts/functions/pipelines/testLogical.dml
index b1efb9d..c0a1306 100644
--- a/src/test/scripts/functions/pipelines/testLogical.dml
+++ b/src/test/scripts/functions/pipelines/testLogical.dml
@@ -68,7 +68,7 @@ getMask = getMask[, 1:ncol(getMask) - 1] # strip the mask of class label
 getFdMask = getFdMask[, 1:ncol(getFdMask) - 1] # strip the mask of class label
 getSchema = getSchema[, 1:ncol(getSchema) - 1] # strip the mask of class label
 # hyperparam for classifier
-opt = utils::getOpByTarget(eX, eY, getMask, targetApplicaton)
+opt = matrix("0 0 100", rows=1, cols=3)
 print("opt "+toString(opt))
 # get the cross validated accuracy on dirty dataset (only on training set)
 d_accuracy = 0