You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@madlib.apache.org by "Himanshu Pandey (JIRA)" <ji...@apache.org> on 2019/05/07 19:17:00 UTC

[jira] [Commented] (MADLIB-1322) MLP with minibatch fails for integer dependent variable on PostgreSQL

    [ https://issues.apache.org/jira/browse/MADLIB-1322?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16835035#comment-16835035 ] 

Himanshu Pandey commented on MADLIB-1322:
-----------------------------------------

[~fmcquillan], 

I have tested in PostgreSQL 10.x and it seems to be working fine there. Also, works fine in GPDB 5.x and 4.3.x. 

 

Thank you! 

> MLP with minibatch fails for integer dependent variable on PostgreSQL
> ---------------------------------------------------------------------
>
>                 Key: MADLIB-1322
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1322
>             Project: Apache MADlib
>          Issue Type: Bug
>          Components: Module: Neural Networks
>            Reporter: Frank McQuillan
>            Priority: Minor
>             Fix For: v1.16
>
>
> Fails on postgres.  On Greenplum it does not fail as far as I know.
> {code}
> DROP TABLE IF EXISTS iris_data;
> CREATE TABLE iris_data(
>     id serial,
>     attributes numeric[],
>     class_text varchar,
>     class integer,
>     state varchar
> );
> INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES
> (1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),
> (2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),
> (4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),
> (6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),
> (7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),
> (8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),
> (10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),
> (11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),
> (12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),
> (13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),
> (14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),
> (15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),
> (16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),
> (17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),
> (18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),
> (19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),
> (20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),
> (21,ARRAY[6.1,2.9,4.7,1.4],'Iris_versicolor',2,'Alaska'),
> (22,ARRAY[5.6,2.9,3.6,1.3],'Iris_versicolor',2,'Alaska'),
> (23,ARRAY[6.7,3.1,4.4,1.4],'Iris_versicolor',2,'Alaska'),
> (24,ARRAY[5.6,3.0,4.5,1.5],'Iris_versicolor',2,'Alaska'),
> (25,ARRAY[5.8,2.7,4.1,1.0],'Iris_versicolor',2,'Alaska'),
> (26,ARRAY[6.2,2.2,4.5,1.5],'Iris_versicolor',2,'Alaska'),
> (27,ARRAY[5.6,2.5,3.9,1.1],'Iris_versicolor',2,'Alaska'),
> (28,ARRAY[5.0,3.4,1.5,0.2],'Iris_setosa',1,'Tennessee'),
> (29,ARRAY[4.4,2.9,1.4,0.2],'Iris_setosa',1,'Tennessee'),
> (30,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Tennessee'),
> (31,ARRAY[5.4,3.7,1.5,0.2],'Iris_setosa',1,'Tennessee'),
> (32,ARRAY[4.8,3.4,1.6,0.2],'Iris_setosa',1,'Tennessee'),
> (33,ARRAY[4.8,3.0,1.4,0.1],'Iris_setosa',1,'Tennessee'),
> (34,ARRAY[4.3,3.0,1.1,0.1],'Iris_setosa',1,'Tennessee'),
> (35,ARRAY[5.8,4.0,1.2,0.2],'Iris_setosa',1,'Tennessee'),
> (36,ARRAY[5.7,4.4,1.5,0.4],'Iris_setosa',1,'Tennessee'),
> (37,ARRAY[5.4,3.9,1.3,0.4],'Iris_setosa',1,'Tennessee'),
> (38,ARRAY[6.0,2.9,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
> (39,ARRAY[5.7,2.6,3.5,1.0],'Iris_versicolor',2,'Tennessee'),
> (40,ARRAY[5.5,2.4,3.8,1.1],'Iris_versicolor',2,'Tennessee'),
> (41,ARRAY[5.5,2.4,3.7,1.0],'Iris_versicolor',2,'Tennessee'),
> (42,ARRAY[5.8,2.7,3.9,1.2],'Iris_versicolor',2,'Tennessee'),
> (43,ARRAY[6.0,2.7,5.1,1.6],'Iris_versicolor',2,'Tennessee'),
> (44,ARRAY[5.4,3.0,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
> (45,ARRAY[6.0,3.4,4.5,1.6],'Iris_versicolor',2,'Tennessee'),
> (46,ARRAY[6.7,3.1,4.7,1.5],'Iris_versicolor',2,'Tennessee'),
> (47,ARRAY[6.3,2.3,4.4,1.3],'Iris_versicolor',2,'Tennessee'),
> (48,ARRAY[5.6,3.0,4.1,1.3],'Iris_versicolor',2,'Tennessee'),
> (49,ARRAY[5.5,2.5,4.0,1.3],'Iris_versicolor',2,'Tennessee'),
> (50,ARRAY[5.5,2.6,4.4,1.2],'Iris_versicolor',2,'Tennessee'),
> (51,ARRAY[6.1,3.0,4.6,1.4],'Iris_versicolor',2,'Tennessee'),
> (52,ARRAY[5.8,2.6,4.0,1.2],'Iris_versicolor',2,'Tennessee');
> {code}
> Works OK if dependent variable is TEXT:
> {code}
> DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
> SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table
>                                      'iris_data_packed',  -- Output table
>                                      'class_text',        -- Dependent variable TEXT
>                                      'attributes'        -- Independent variables
>                                     );
> \d+ iris_data_packed
>                                Table "public.iris_data_packed"
>        Column        |        Type        | Modifiers | Storage  | Stats target | Description 
> ---------------------+--------------------+-----------+----------+--------------+-------------
>  __id__              | bigint             |           | plain    |              | 
>  dependent_varname   | double precision[] |           | extended |              | 
>  independent_varname | double precision[] |           | extended |              | 
> DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
> -- Set seed so results are reproducible
> SELECT setseed(0);
> SELECT madlib.mlp_classification(
>     'iris_data_packed',      -- Output table from mini-batch preprocessor
>     'mlp_model',             -- Destination table
>     'independent_varname',   -- Hardcode to this, from table iris_data_packed
>     'dependent_varname',     -- Hardcode to this, from table iris_data_packed
>     ARRAY[5],                -- Number of units per layer
>     'learning_rate_init=0.1,
>     n_iterations=5,
>     tolerance=0',            -- Optimizer params
>     'tanh',                  -- Activation function
>     NULL,                    -- Default weight (1)
>     FALSE,                   -- No warm start
>     TRUE                    -- Not verbose
> );
> INFO:  Iteration: 1, Loss: <0.990848103579>
> INFO:  Iteration: 2, Loss: <0.852423978558>
> INFO:  Iteration: 3, Loss: <0.689764103374>
> INFO:  Iteration: 4, Loss: <0.530458765792>
>  mlp_classification 
> --------------------
>  
> (1 row)
> {code}
> Does not work if dependent variable is INTEGER:
> {code}
> DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
> SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table
>                                      'iris_data_packed',  -- Output table
>                                      'class',        -- Dependent variable INTEGER
>                                      'attributes',        -- Independent variables
>                                      NULL, -- grouping
>                                      NULL, -- buffer size (or size of the mini-batch)
>                                      TRUE -- Encode scalar int dependent variable
>                                      );
> \d+ iris_data_packed
>                                Table "public.iris_data_packed"
>        Column        |        Type        | Modifiers | Storage  | Stats target | Description 
> ---------------------+--------------------+-----------+----------+--------------+-------------
>  __id__              | bigint             |           | plain    |              | 
>  dependent_varname   | double precision[] |           | extended |              | 
>  independent_varname | double precision[] |           | extended |              | 
> DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
> -- Set seed so results are reproducible
> SELECT setseed(0);
> SELECT madlib.mlp_classification(
>     'iris_data_packed',      -- Output table from mini-batch preprocessor
>     'mlp_model',             -- Destination table
>     'independent_varname',   -- Hardcode to this, from table iris_data_packed
>     'dependent_varname',     -- Hardcode to this, from table iris_data_packed
>     ARRAY[5],                -- Number of units per layer
>     'learning_rate_init=0.1,
>     n_iterations=10,
>     tolerance=0',            -- Optimizer params
>     'tanh',                  -- Activation function
>     NULL,                    -- Default weight (1)
>     FALSE,                   -- No warm start
>     TRUE                    -- Not verbose
> );
> ERROR:  TypeError: must be string, not int
> CONTEXT:  Traceback (most recent call last):
>   PL/Python function "mlp_classification", line 33, in <module>
>     grouping_col)
>   PL/Python function "mlp_classification", line 42, in wrapper
>   PL/Python function "mlp_classification", line 147, in mlp
>   PL/Python function "mlp_classification", line 74, in quote_literal
> PL/Python function "mlp_classification"
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)