You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/05/29 19:48:17 UTC
[systemds] branch master updated: [SYSTEMDS-2983] Additional
cleanups gridSearch (docs, args, defaults)
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new d27b185 [SYSTEMDS-2983] Additional cleanups gridSearch (docs, args, defaults)
d27b185 is described below
commit d27b185f7ee6d210943eec618375894c6ffa20dd
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat May 29 21:45:26 2021 +0200
[SYSTEMDS-2983] Additional cleanups gridSearch (docs, args, defaults)
This patch makes final cleanups in the gridSearch builtin function, now
having correct handling for missing trainArgs, adding the handling of
predictArgs, and adding the missing parameter documentation.
---
scripts/builtin/gridSearch.dml | 53 +++++++++++++++++-----
.../scripts/functions/builtin/GridSearchLM.dml | 5 +-
.../functions/builtin/GridSearchMLogreg.dml | 3 +-
.../scripts/functions/builtin/HyperbandLM3.dml | 2 +-
4 files changed, 46 insertions(+), 17 deletions(-)
diff --git a/scripts/builtin/gridSearch.dml b/scripts/builtin/gridSearch.dml
index df4d5be..5a5bdba 100644
--- a/scripts/builtin/gridSearch.dml
+++ b/scripts/builtin/gridSearch.dml
@@ -19,13 +19,42 @@
#
#-------------------------------------------------------------
+
+#-------------------------------------------------------------------------------
+# X Input feature matrix
+# y Input label vector (or matrix)
+# train Name ft of the train function to call via ft(trainArgs)
+# predict Name fp of the loss function to call via fp((predictArgs,B))
+# numB Maximum number of parameters in model B (pass the maximum
+# because the size of B may vary with parameters like icpt
+# params List of varied hyper-parameter names
+# paramValues List of matrices providing the parameter values as
+# columnvectors for position-aligned hyper-parameters in 'params'
+# trainArgs named List of arguments to pass to the 'train' function, where
+# gridSearch replaces enumerated hyper-parameter by name, if
+# not provided or an empty list, the lm parameters are used
+# predictArgs List of arguments to pass to the 'predict' function, where
+# gridSearch appends the trained models at the end, if
+# not provided or an empty list, list(X, y) is used instead
+# verbose flag for verbose debug output
+#-------------------------------------------------------------------------------
+# B the trained model with minimal loss (by the 'predict' function)
+# opt one-row frame w/ optimal hyperparameters (by 'params' position)
+#-------------------------------------------------------------------------------
+
m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict,
- Integer ncolB=ncol(X), List[String] params, List[Unknown] paramValues, List[Unknown]
- trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE),
+ Integer numB=ncol(X), List[String] params, List[Unknown] paramValues,
+ List[Unknown] trainArgs = list(), List[Unknown] predictArgs = list(),
Boolean verbose = TRUE)
return (Matrix[Double] B, Frame[Unknown] opt)
{
- # Step 0) preparation of parameters, lengths, and values in convenient form
+ # Step 0) handling default arguments, which require access to passed data
+ if( length(trainArgs) == 0 )
+ trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
+ if( length(predictArgs) == 0 )
+ predictArgs = list(X, y);
+
+ # Step 1) preparation of parameters, lengths, and values in convenient form
numParams = length(params);
paramLens = matrix(0, numParams, 1);
for( j in 1:numParams ) {
@@ -39,8 +68,8 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
}
cumLens = rev(cumprod(rev(paramLens))/rev(paramLens));
numConfigs = prod(paramLens);
-
- # Step 1) materialize hyper-parameter combinations
+
+ # Step 2) materialize hyper-parameter combinations
# (simplify debugging and compared to compute negligible)
HP = matrix(0, numConfigs, numParams);
parfor( i in 1:nrow(HP) ) {
@@ -53,23 +82,23 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
print("GridSeach: Hyper-parameter combinations: \n"+toString(HP));
}
- # Step 2) training/scoring of parameter combinations
+ # Step 3) training/scoring of parameter combinations
# TODO integrate cross validation
- Rbeta = matrix(0, nrow(HP), ncolB);
+ Rbeta = matrix(0, nrow(HP), numB);
Rloss = matrix(0, nrow(HP), 1);
parfor( i in 1:nrow(HP) ) {
# a) replace training arguments
- largs = trainArgs;
+ ltrainArgs = trainArgs;
for( j in 1:numParams )
- largs[as.scalar(params[j])] = as.scalar(HP[i,j]);
+ ltrainArgs[as.scalar(params[j])] = as.scalar(HP[i,j]);
# b) core training/scoring and write-back
- lbeta = t(eval(train, largs))
+ lbeta = t(eval(train, ltrainArgs))
Rbeta[i,1:ncol(lbeta)] = lbeta;
- Rloss[i,] = eval(predict, list(X, y, t(Rbeta[i,])));
+ Rloss[i,] = eval(predict, append(predictArgs,t(lbeta)));
}
- # Step 3) select best parameter combination
+ # Step 4) select best parameter combination
ix = as.scalar(rowIndexMin(t(Rloss)));
B = t(Rbeta[ix,]); # optimal model
opt = as.frame(HP[ix,]); # optimal hyper-parameters
diff --git a/src/test/scripts/functions/builtin/GridSearchLM.dml b/src/test/scripts/functions/builtin/GridSearchLM.dml
index f6ec084..4311eba 100644
--- a/src/test/scripts/functions/builtin/GridSearchLM.dml
+++ b/src/test/scripts/functions/builtin/GridSearchLM.dml
@@ -34,9 +34,8 @@ ytest = y[(N+1):nrow(X),];
params = list("reg", "tol", "maxi");
paramRanges = list(10^seq(0,-4), 10^seq(-6,-12), 10^seq(1,3));
-trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
-[B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
- ncolB=ncol(X), params=params, paramValues=paramRanges, trainArgs=trainArgs);
+[B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
+ numB=ncol(X), params=params, paramValues=paramRanges);
B2 = lm(X=Xtrain, y=ytrain, verbose=FALSE);
l1 = l2norm(Xtest, ytest, B1);
diff --git a/src/test/scripts/functions/builtin/GridSearchMLogreg.dml b/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
index ac96fff..33bd9f5 100644
--- a/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
+++ b/src/test/scripts/functions/builtin/GridSearchMLogreg.dml
@@ -36,7 +36,8 @@ ytest = y[(N+1):nrow(X),];
params = list("icpt", "reg", "maxii");
paramRanges = list(seq(0,2),10^seq(1,-6), 10^seq(1,3));
trainArgs = list(X=Xtrain, Y=ytrain, icpt=-1, reg=-1, tol=1e-9, maxi=100, maxii=-1, verbose=FALSE);
-[B1,opt] = gridSearch(Xtrain, ytrain, "multiLogReg", "accuracy", ncol(X)+1, params, paramRanges, trainArgs, TRUE);
+[B1,opt] = gridSearch(X=Xtrain, y=ytrain, train="multiLogReg", predict="accuracy", numB=ncol(X)+1,
+ params=params, paramValues=paramRanges, trainArgs=trainArgs, verbose=TRUE);
B2 = multiLogReg(X=Xtrain, Y=ytrain, verbose=TRUE);
l1 = accuracy(Xtest, ytest, B1);
diff --git a/src/test/scripts/functions/builtin/HyperbandLM3.dml b/src/test/scripts/functions/builtin/HyperbandLM3.dml
index e2b23fa..b35d36d 100644
--- a/src/test/scripts/functions/builtin/HyperbandLM3.dml
+++ b/src/test/scripts/functions/builtin/HyperbandLM3.dml
@@ -43,7 +43,7 @@ paramRanges = matrix("0 20", rows=1, cols=2);
paramRanges2 = list(10^seq(0,-4))
trainArgs = list(X=X_train, y=y_train, icpt=0, reg=-1, tol=1e-9, maxi=0, verbose=FALSE);
-[bestWeights, optHyperParams2] = gridSearch(X=X_train, y=y_train, ncolB=ncol(X),
+[bestWeights, optHyperParams2] = gridSearch(X=X_train, y=y_train, numB=ncol(X),
train="lm", predict="l2norm", trainArgs=trainArgs, params=params, paramValues=paramRanges2);
print(toString(optHyperParams))