You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ss...@apache.org on 2022/02/06 18:19:38 UTC

[systemds] branch main updated: [MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 2464b00  [MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,
2464b00 is described below

commit 2464b000e0896df3459de82b49aeb983477df71b
Author: Shafaq Siddiqi <sh...@tugraz.at>
AuthorDate: Sun Feb 6 17:35:14 2022 +0100

    [MINOR] Adding dataArgs parameter to gridsearch to get names of data variables i.e., X, Y, x, y e.t.c,
---
 scripts/builtin/gridSearch.dml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/builtin/gridSearch.dml b/scripts/builtin/gridSearch.dml
index a8c0986..eb1f16e 100644
--- a/scripts/builtin/gridSearch.dml
+++ b/scripts/builtin/gridSearch.dml
@@ -33,6 +33,7 @@
 # numB         Integer            ---        Maximum number of parameters in model B (pass the max because the size
 #                                            may vary with parameters like icpt or multi-class classification)
 # params       List[String]       ---        List of varied hyper-parameter names
+# dataArgs     List[String]       ---        List of data parameters (to identify data parameters by name i.e. list("X", "Y"))
 # paramValues  List[Unknown]      ---        List of matrices providing the parameter values as
 #                                            columnvectors for position-aligned hyper-parameters in 'params'
 # trainArgs    List[Unknown]      ---        named List of arguments to pass to the 'train' function, where
@@ -58,20 +59,21 @@
 
 m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String predict,
     Integer numB=ncol(X), List[String] params, List[Unknown] paramValues,
-    List[Unknown] trainArgs = list(), List[Unknown] predictArgs = list(),
+    List[Unknown] trainArgs = list(), List[Unknown] dataArgs = list(), List[Unknown] predictArgs = list(),
     Boolean cv = FALSE, Integer cvk = 5, Boolean verbose = TRUE)
   return (Matrix[Double] B, Frame[Unknown] opt)
 {
   # Step 0) handling default arguments, which require access to passed data
   if( length(trainArgs) == 0 )
     trainArgs = list(X=X, y=y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
+  if( length(dataArgs) == 0 )
+    dataArgs = list("X", "y");  
   if( length(predictArgs) == 0 )
     predictArgs = list(X, y);
   if( cv & cvk <= 1 ) {
     print("gridSearch: called with cv=TRUE but cvk="+cvk+", set to default cvk=5.")
     cvk = 5;
   }
-
   # Step 1) preparation of parameters, lengths, and values in convenient form
   numParams = length(params);
   paramLens = matrix(0, numParams, 1);
@@ -106,6 +108,8 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
 
   # with cross-validation
   if( cv ) {
+    yidx = as.scalar(dataArgs[2])
+    xidx = as.scalar(dataArgs[1])
     # a) create folds
     foldsX = list(); foldsY = list();
     fs = ceil(nrow(X)/cvk);
@@ -125,8 +129,8 @@ m_gridSearch = function(Matrix[Double] X, Matrix[Double] y, String train, String
       for( k in 1:cvk ) {
         [tmpX, testX] = remove(foldsX, k);
         [tmpy, testy] = remove(foldsY, k);
-        ltrainArgs['X'] = rbind(tmpX);
-        ltrainArgs['y'] = rbind(tmpy);
+        ltrainArgs[xidx] = rbind(tmpX);
+        ltrainArgs[yidx] = rbind(tmpy);
         lbeta = t(eval(train, ltrainArgs));
         cvbeta[,1:length(lbeta)] = cvbeta[,1:length(lbeta)] + matrix(lbeta, 1, length(lbeta));
         lpredictArgs[1] = as.matrix(testX);