You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@madlib.apache.org by LUYAO CHEN <lu...@hotmail.com> on 2018/07/26 18:47:38 UTC

A problem of madlib.cross_validation_general

Dear user community,


I found a problem that could be a bug.  Could you explore?


It happened when the explore value is varchar, in the below case, I want to check the performance of different optimizer, which in characters.


DROP TABLE IF EXISTS patients;
CREATE TABLE patients( id INTEGER NOT NULL,
                       second_attack boolean, -- second_attack integer,
                       treatment INTEGER,
                       trait_anxiety INTEGER);
INSERT INTO patients VALUES
(1,  True, 1, 70),
(2,  True, 1, 80),
(3,  True, 1, 50),
(4,  True, 0, 60),
(5,  True, 0, 40),
(6,  True, 0, 65),
(7,  True, 0, 75),
(8,  True, 0, 80),
(9,  True, 0, 70),
(10, True, 0, 60),
(11, FALSE, 1, 65),
(12, FALSE, 1, 50),
(13, FALSE, 1, 45),
(14, FALSE, 1, 35),
(15, FALSE, 1, 40),
(16, FALSE, 1, 50),
(17, FALSE, 0, 55),
(18, FALSE, 0, 45),
(19, FALSE, 0, 50),
(20, FALSE, 0, 60);


drop table  if exists  cv_result cascade;
SELECT madlib.cross_validation_general
    ( 'madlib.logregr_train',        -- modelling_func
        '{%data%, %model%, second_attack, "ARRAY[1, treatment, trait_anxiety]", null, 10,Optimizer,0.001,TRUE  }'::varchar[],   -- modelling_params
        '{varchar, varchar, varchar, varchar, varchar,integer,varchar,double precision,boolean}'::varchar[],                       --modelling_params_type
        'Optimizer',                                      --param_explored
        '{irls,cg,igd}'::varchar[],                                  --explore_values
        'madlib.cv_logregr_predict',                                             --predict_func
        '{%model%, %data%,"ARRAY[1, treatment, trait_anxiety]",%id%, %prediction%}'::varchar[],                       --predict_params
        '{text, text, text,text, text}'::varchar[],                                  --predict_params_type
        'madlib.cv_logregr_accuracy',                                                       --metric_func
        '{%prediction%, %data%, %id%, second_attack, %error%}'::varchar[],                --metric_params
        '{varchar, varchar, varchar,varchar, varchar}'::varchar[],                 --metric_params_type
        'patients',                                                          --data_tbl
        'id',                                        --data_id
         TRUE,                                                  --id_is_random
         'cv_result',                                         --validation_result
         '{id,second_attack, treatment, trait_anxiety}'::varchar[],                              --data_cols
         10                                                      --fold_num
);

ERROR:  spiexceptions.UndefinedColumn: column "irls" does not exist
LINE 5:                     (irls)::varchar as Optimizer,
                             ^
QUERY:
            DROP TABLE IF EXISTS __madlib_temp_accum_error84716681_1532630557_66836697__;
            CREATE TEMP TABLE __madlib_temp_accum_error84716681_1532630557_66836697__ as
                SELECT
                    (irls)::varchar as Optimizer,
                    pg_temp.__madlib_temp_output_error1973940_1532630557_11061763__.*
                FROM pg_temp.__madlib_temp_output_error1973940_1532630557_11061763__


CONTEXT:  Traceback (most recent call last):
  PL/Python function "cross_validation_general", line 23, in <module>
    return cross_validation.cross_validation_general(**globals())
  PL/Python function "cross_validation_general", line 366, in cross_validation_general
  PL/Python function "cross_validation_general", line 283, in _one_step_cv
PL/Python function "cross_validation_general"