You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2021/07/05 16:12:37 UTC
[systemds] branch master updated: [MINOR] cleanups in cleaning tests
This is an automated email from the ASF dual-hosted git repository.
ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 56eb216 [MINOR] cleanups in cleaning tests
56eb216 is described below
commit 56eb216420194533bc205b0daa913dc12320f0f6
Author: Shafaq Siddiqi <sh...@tugraz.at>
AuthorDate: Mon Jul 5 18:12:10 2021 +0200
[MINOR] cleanups in cleaning tests
---
scripts/builtin/bandit.dml | 4 ++--
.../sysds/test/functions/pipelines/CleaningTestClassification.java | 3 +--
src/test/scripts/functions/pipelines/intermediates/hyperparams.csv | 7 ++-----
src/test/scripts/functions/pipelines/intermediates/logical.csv | 2 +-
src/test/scripts/functions/pipelines/intermediates/pipelines.csv | 7 ++-----
src/test/scripts/functions/pipelines/testClassification.dml | 5 +++--
src/test/scripts/functions/pipelines/testLogical.dml | 2 +-
7 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 5c24dab..8770cdc 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -95,7 +95,7 @@ m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, List[Unknown
# sort the configurations for successive halving
avergae_perf = getMaxPerConf(outPip, nrow(configurations))
- configurations = frameSort(cbind(avergae_perf, configurations))
+ configurations = frameSort(cbind(avergae_perf, configurations), TRUE)
configurations = configurations[, 2:ncol(configurations)]
}
bracket_pipel = removeEmpty(target=bracket_pipel, margin="rows")
@@ -405,7 +405,7 @@ extractTopK = function(Frame[Unknown] pipeline, Matrix[Double] hyperparam,
# sort results
hyperparam = order(target = hyperparam, by = 1, decreasing=TRUE, index.return=FALSE)
- pipeline = frameSort(pipeline)
+ pipeline = frameSort(pipeline, TRUE)
# remove the row with accuracy less than test accuracy
diff --git a/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java b/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
index 8144d89..9043fdf 100644
--- a/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
+++ b/src/test/java/org/apache/sysds/test/functions/pipelines/CleaningTestClassification.java
@@ -40,7 +40,6 @@ public class CleaningTestClassification extends AutomatedTestBase {
private final static String CLEAN = DATA_DIR+ "clean.csv";
private final static String META = RESOURCE+ "meta/meta_census.csv";
private final static String OUTPUT = RESOURCE+"intermediates/";
- private final static String LOGICAL = RESOURCE+"intermediates/logical.csv";
private static final String PARAM_DIR = "./scripts/pipelines/properties/";
private final static String PARAM = PARAM_DIR + "param.csv";
@@ -79,7 +78,7 @@ public class CleaningTestClassification extends AutomatedTestBase {
loadTestConfiguration(getTestConfiguration(TEST_NAME1));
fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
programArgs = new String[] {"-stats", "-exec", "singlenode", "-nvargs", "dirtyData="+DIRTY,
- "metaData="+META, "primitives="+PRIMITIVES, "parameters="+PARAM, "logical="+LOGICAL,
+ "metaData="+META, "primitives="+PRIMITIVES, "parameters="+PARAM,
"sampleSize="+ sample, "topk="+ topk, "rv="+ resources, "cv="+ crossfold,
"weighted="+ weightedAccuracy, "output="+OUTPUT, "target="+target, "cleanData="+CLEAN,
"O="+output("O")};
diff --git a/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv b/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
index 2e19fb1..a4729a4 100644
--- a/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/hyperparams.csv
@@ -1,5 +1,2 @@
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,86.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,67.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,102.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,104.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,0,0,0,0,1.0,0,0,0,2.0,2.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0,2.0,3.0,51.0,1.0,0,0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+12.0,0,1.0,0,0,0,2.0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+12.0,0,1.0,0,0,0,2.0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/src/test/scripts/functions/pipelines/intermediates/logical.csv b/src/test/scripts/functions/pipelines/intermediates/logical.csv
index d3be4ee..20d1e3c 100644
--- a/src/test/scripts/functions/pipelines/intermediates/logical.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/logical.csv
@@ -1 +1 @@
-MVI,OTLR,DUMMY,DIM
+MVI,DUMMY,DIM
diff --git a/src/test/scripts/functions/pipelines/intermediates/pipelines.csv b/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
index f798164..f20554a 100644
--- a/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/pipelines.csv
@@ -1,5 +1,2 @@
-imputeByMean,scale,dummycoding,m_pca
-imputeByMean,scale,dummycoding,m_pca
-imputeByMedian,scale,dummycoding,m_pca
-imputeByMedian,scale,dummycoding,m_pca
-imputeByMean,scale,dummycoding,m_pca
+imputeByMean,dummycoding
+imputeByMedian,dummycoding
diff --git a/src/test/scripts/functions/pipelines/testClassification.dml b/src/test/scripts/functions/pipelines/testClassification.dml
index f83bea3..62147e5 100644
--- a/src/test/scripts/functions/pipelines/testClassification.dml
+++ b/src/test/scripts/functions/pipelines/testClassification.dml
@@ -29,7 +29,7 @@ F = read($dirtyData, data_type="frame", format="csv", header=TRUE,
metaInfo = read($metaData, data_type="frame", format="csv", header=FALSE);
primitives = read($primitives, data_type = "frame", format="csv", header= TRUE)
param = read($parameters, data_type = "frame", format="csv", header= TRUE)
-logical = read($logical, data_type = "frame", format="csv", header= FALSE)
+logical = frame(["MVI", "DUMMY"], rows=1, cols=2)
sample = $sampleSize
topK = $topk
resources = $rv
@@ -80,7 +80,8 @@ getSchema = getSchema[, 1:ncol(getSchema) - 1] # strip the mask of class label
# 5. find the best hyper parameters for classification algorithm
# for now only find the best values for intercept and maximum outer iteration
-opt = utils::getOpByTarget(eX, eY, getMask, targetApplicaton)
+opt = matrix("0 0 100", rows=1, cols=3)
+
# 6. get the cross validated accuracy on dirty dataset (only on training set)
d_accuracy = 0
diff --git a/src/test/scripts/functions/pipelines/testLogical.dml b/src/test/scripts/functions/pipelines/testLogical.dml
index b1efb9d..c0a1306 100644
--- a/src/test/scripts/functions/pipelines/testLogical.dml
+++ b/src/test/scripts/functions/pipelines/testLogical.dml
@@ -68,7 +68,7 @@ getMask = getMask[, 1:ncol(getMask) - 1] # strip the mask of class label
getFdMask = getFdMask[, 1:ncol(getFdMask) - 1] # strip the mask of class label
getSchema = getSchema[, 1:ncol(getSchema) - 1] # strip the mask of class label
# hyperparam for classifier
-opt = utils::getOpByTarget(eX, eY, getMask, targetApplicaton)
+opt = matrix("0 0 100", rows=1, cols=3)
print("opt "+toString(opt))
# get the cross validated accuracy on dirty dataset (only on training set)
d_accuracy = 0