You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2022/02/06 18:19:38 UTC
[systemds] branch main updated: [MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,
This is an automated email from the ASF dual-hosted git repository.
ssiddiqi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 2464b00 [MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,
2464b00 is described below
commit 2464b000e0896df3459de82b49aeb983477df71b
Author: Shafaq Siddiqi <sh...@tugraz.at>
AuthorDate: Sun Feb 6 17:35:14 2022 +0100
[MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,
---
scripts/builtin/gridSearch.dml | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/scripts/builtin/gridSearch.dml b/scripts/builtin/gridSearch.dml
index a8c0986..eb1f16e 100644
--- a/scripts/builtin/gridSearch.dml
+++ b/scripts/builtin/gridSearch.dml
@@ -33,6 +33,7 @@
# numB Integer --- Maximum number of parameters in model B (pass the max because the size
# may vary with parameters like icpt or multi-class classification)
# params List[String] --- List of varied hyper-parameter names
+# dataArgs List[String] --- List of data parameters (to identify data parameters by name i.e. list("X", "Y"))
# paramValues List[Unknown] --- List of matrices providing the parameter values as
# columnvectors for position-aligned hyper-parameters in 'params'
# trainArgs List[Unknown] --- named List of arguments to pass to the 'train' function, where
@@ -58,20 +59,21 @@
m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict,
Integer numB=ncol(X), List[String] params, List[Unknown] paramValues,
- List[Unknown] trainArgs = list(), List[Unknown] predictArgs = list(),
+ List[Unknown] trainArgs = list(), List[Unknown] dataArgs = list(), List[Unknown] predictArgs = list(),
Boolean cv = FALSE, Integer cvk = 5, Boolean verbose = TRUE)
return (Matrix[Double] B, Frame[Unknown] opt)
{
# Step 0) handling default arguments, which require access to passed data
if( length(trainArgs) == 0 )
trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
+ if( length(dataArgs) == 0 )
+ dataArgs = list("X", "y");
if( length(predictArgs) == 0 )
predictArgs = list(X, y);
if( cv & cvk <= 1 ) {
print("gridSearch: called with cv=TRUE but cvk="+cvk+", set to default cvk=5.")
cvk = 5;
}
-
# Step 1) preparation of parameters, lengths, and values in convenient form
numParams = length(params);
paramLens = matrix(0, numParams, 1);
@@ -106,6 +108,8 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
# with cross-validation
if( cv ) {
+ yidx = as.scalar(dataArgs[2])
+ xidx = as.scalar(dataArgs[1])
# a) create folds
foldsX = list(); foldsY = list();
fs = ceil(nrow(X)/cvk);
@@ -125,8 +129,8 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
for( k in 1:cvk ) {
[tmpX, testX] = remove(foldsX, k);
[tmpy, testy] = remove(foldsY, k);
- ltrainArgs['X'] = rbind(tmpX);
- ltrainArgs['y'] = rbind(tmpy);
+ ltrainArgs[xidx] = rbind(tmpX);
+ ltrainArgs[yidx] = rbind(tmpy);
lbeta = t(eval(train, ltrainArgs));
cvbeta[,1:length(lbeta)] = cvbeta[,1:length(lbeta)] + matrix(lbeta, 1, length(lbeta));
lpredictArgs[1] = as.matrix(testX);