You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@madlib.apache.org by "Frank McQuillan (JIRA)" <ji...@apache.org> on 2019/05/07 19:31:00 UTC
[jira] [Resolved] (MADLIB-1322) MLP with minibatch fails for
integer dependent variable on PostgreSQL
[ https://issues.apache.org/jira/browse/MADLIB-1322?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Frank McQuillan resolved MADLIB-1322.
-------------------------------------
Resolution: Won't Fix
Not worth fixing for pg 9.x
> MLP with minibatch fails for integer dependent variable on PostgreSQL
> ---------------------------------------------------------------------
>
> Key: MADLIB-1322
> URL: https://issues.apache.org/jira/browse/MADLIB-1322
> Project: Apache MADlib
> Issue Type: Bug
> Components: Module: Neural Networks
> Reporter: Frank McQuillan
> Priority: Minor
> Fix For: v1.16
>
>
> Fails on postgres 9.6. On Greenplum it does not fail as far as I know.
> {code}
> DROP TABLE IF EXISTS iris_data;
> CREATE TABLE iris_data(
> id serial,
> attributes numeric[],
> class_text varchar,
> class integer,
> state varchar
> );
> INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES
> (1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),
> (2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),
> (4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),
> (6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),
> (7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),
> (8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),
> (9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),
> (10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),
> (11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),
> (12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),
> (13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),
> (14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),
> (15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),
> (16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),
> (17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),
> (18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),
> (19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),
> (20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),
> (21,ARRAY[6.1,2.9,4.7,1.4],'Iris_versicolor',2,'Alaska'),
> (22,ARRAY[5.6,2.9,3.6,1.3],'Iris_versicolor',2,'Alaska'),
> (23,ARRAY[6.7,3.1,4.4,1.4],'Iris_versicolor',2,'Alaska'),
> (24,ARRAY[5.6,3.0,4.5,1.5],'Iris_versicolor',2,'Alaska'),
> (25,ARRAY[5.8,2.7,4.1,1.0],'Iris_versicolor',2,'Alaska'),
> (26,ARRAY[6.2,2.2,4.5,1.5],'Iris_versicolor',2,'Alaska'),
> (27,ARRAY[5.6,2.5,3.9,1.1],'Iris_versicolor',2,'Alaska'),
> (28,ARRAY[5.0,3.4,1.5,0.2],'Iris_setosa',1,'Tennessee'),
> (29,ARRAY[4.4,2.9,1.4,0.2],'Iris_setosa',1,'Tennessee'),
> (30,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Tennessee'),
> (31,ARRAY[5.4,3.7,1.5,0.2],'Iris_setosa',1,'Tennessee'),
> (32,ARRAY[4.8,3.4,1.6,0.2],'Iris_setosa',1,'Tennessee'),
> (33,ARRAY[4.8,3.0,1.4,0.1],'Iris_setosa',1,'Tennessee'),
> (34,ARRAY[4.3,3.0,1.1,0.1],'Iris_setosa',1,'Tennessee'),
> (35,ARRAY[5.8,4.0,1.2,0.2],'Iris_setosa',1,'Tennessee'),
> (36,ARRAY[5.7,4.4,1.5,0.4],'Iris_setosa',1,'Tennessee'),
> (37,ARRAY[5.4,3.9,1.3,0.4],'Iris_setosa',1,'Tennessee'),
> (38,ARRAY[6.0,2.9,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
> (39,ARRAY[5.7,2.6,3.5,1.0],'Iris_versicolor',2,'Tennessee'),
> (40,ARRAY[5.5,2.4,3.8,1.1],'Iris_versicolor',2,'Tennessee'),
> (41,ARRAY[5.5,2.4,3.7,1.0],'Iris_versicolor',2,'Tennessee'),
> (42,ARRAY[5.8,2.7,3.9,1.2],'Iris_versicolor',2,'Tennessee'),
> (43,ARRAY[6.0,2.7,5.1,1.6],'Iris_versicolor',2,'Tennessee'),
> (44,ARRAY[5.4,3.0,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
> (45,ARRAY[6.0,3.4,4.5,1.6],'Iris_versicolor',2,'Tennessee'),
> (46,ARRAY[6.7,3.1,4.7,1.5],'Iris_versicolor',2,'Tennessee'),
> (47,ARRAY[6.3,2.3,4.4,1.3],'Iris_versicolor',2,'Tennessee'),
> (48,ARRAY[5.6,3.0,4.1,1.3],'Iris_versicolor',2,'Tennessee'),
> (49,ARRAY[5.5,2.5,4.0,1.3],'Iris_versicolor',2,'Tennessee'),
> (50,ARRAY[5.5,2.6,4.4,1.2],'Iris_versicolor',2,'Tennessee'),
> (51,ARRAY[6.1,3.0,4.6,1.4],'Iris_versicolor',2,'Tennessee'),
> (52,ARRAY[5.8,2.6,4.0,1.2],'Iris_versicolor',2,'Tennessee');
> {code}
> Works OK if dependent variable is TEXT:
> {code}
> DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
> SELECT madlib.minibatch_preprocessor('iris_data', -- Source table
> 'iris_data_packed', -- Output table
> 'class_text', -- Dependent variable TEXT
> 'attributes' -- Independent variables
> );
> \d+ iris_data_packed
> Table "public.iris_data_packed"
> Column | Type | Modifiers | Storage | Stats target | Description
> ---------------------+--------------------+-----------+----------+--------------+-------------
> __id__ | bigint | | plain | |
> dependent_varname | double precision[] | | extended | |
> independent_varname | double precision[] | | extended | |
> DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
> -- Set seed so results are reproducible
> SELECT setseed(0);
> SELECT madlib.mlp_classification(
> 'iris_data_packed', -- Output table from mini-batch preprocessor
> 'mlp_model', -- Destination table
> 'independent_varname', -- Hardcode to this, from table iris_data_packed
> 'dependent_varname', -- Hardcode to this, from table iris_data_packed
> ARRAY[5], -- Number of units per layer
> 'learning_rate_init=0.1,
> n_iterations=5,
> tolerance=0', -- Optimizer params
> 'tanh', -- Activation function
> NULL, -- Default weight (1)
> FALSE, -- No warm start
> TRUE -- Not verbose
> );
> INFO: Iteration: 1, Loss: <0.990848103579>
> INFO: Iteration: 2, Loss: <0.852423978558>
> INFO: Iteration: 3, Loss: <0.689764103374>
> INFO: Iteration: 4, Loss: <0.530458765792>
> mlp_classification
> --------------------
>
> (1 row)
> {code}
> Does not work if dependent variable is INTEGER:
> {code}
> DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
> SELECT madlib.minibatch_preprocessor('iris_data', -- Source table
> 'iris_data_packed', -- Output table
> 'class', -- Dependent variable INTEGER
> 'attributes', -- Independent variables
> NULL, -- grouping
> NULL, -- buffer size (or size of the mini-batch)
> TRUE -- Encode scalar int dependent variable
> );
> \d+ iris_data_packed
> Table "public.iris_data_packed"
> Column | Type | Modifiers | Storage | Stats target | Description
> ---------------------+--------------------+-----------+----------+--------------+-------------
> __id__ | bigint | | plain | |
> dependent_varname | double precision[] | | extended | |
> independent_varname | double precision[] | | extended | |
> DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
> -- Set seed so results are reproducible
> SELECT setseed(0);
> SELECT madlib.mlp_classification(
> 'iris_data_packed', -- Output table from mini-batch preprocessor
> 'mlp_model', -- Destination table
> 'independent_varname', -- Hardcode to this, from table iris_data_packed
> 'dependent_varname', -- Hardcode to this, from table iris_data_packed
> ARRAY[5], -- Number of units per layer
> 'learning_rate_init=0.1,
> n_iterations=10,
> tolerance=0', -- Optimizer params
> 'tanh', -- Activation function
> NULL, -- Default weight (1)
> FALSE, -- No warm start
> TRUE -- Not verbose
> );
> ERROR: TypeError: must be string, not int
> CONTEXT: Traceback (most recent call last):
> PL/Python function "mlp_classification", line 33, in <module>
> grouping_col)
> PL/Python function "mlp_classification", line 42, in wrapper
> PL/Python function "mlp_classification", line 147, in mlp
> PL/Python function "mlp_classification", line 74, in quote_literal
> PL/Python function "mlp_classification"
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)