You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@madlib.apache.org by "Rashmi Raghu (JIRA)" <ji...@apache.org> on 2018/03/14 22:20:00 UTC

[jira] [Updated] (MADLIB-1215) PCA error with text grouping column

     [ https://issues.apache.org/jira/browse/MADLIB-1215?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Rashmi Raghu updated MADLIB-1215:
---------------------------------
    Description: 
{{The issue is that PCA train does not run when the grouping column is text (have not tested other non-integer data types). See below for error reproduced on a modified example from the docs.}}



DROP TABLE IF EXISTS mat_group_text;
CREATE TABLE mat_group_text (
 id integer,
 row_vec double precision[],
 matrix_id_text text
);
INSERT INTO mat_group_text VALUES
(1, '\{1,2,3}', '1'),
(2, '\{2,1,2}', '1'),
(3, '\{3,2,1}', '1'),
(4, '\{1,2,3,4,5}', '2'),
(5, '\{2,5,2,4,1}', '2'),
(6, '\{5,4,3,2,1}', '2');

DROP TABLE IF EXISTS result_table_group_text, result_table_group_text_mean;
SELECT madlib.pca_train('mat_group_text', -- Source table
 'result_table_group_text', -- Output table
 'id', -- Row id of source table
 0.8, -- Proportion of variance
 'matrix_id_text'); -- Grouping column
SELECT * FROM result_table_group_text ORDER BY matrix_id_text, row_id_text;
-- NOTICE: table "result_table_group_text" does not exist, skipping
-- NOTICE: table "result_table_group_text_mean" does not exist, skipping
-- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = integer
-- LINE 5: WHERE matrix_id_text=1
-- ^
-- HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts.
-- QUERY: 
-- CREATE TABLE pg_temp.__madlib_temp_57228654_1520981521_47712361__group_0 AS
-- SELECT ROW_NUMBER() OVER() AS row_id, row_vec
-- FROM mat_group_text
-- WHERE matrix_id_text=1
-- 
-- CONTEXT: Traceback (most recent call last):
-- PL/Python function "pca_train", line 23, in <module>
-- return pca.pca(**globals())
-- PL/Python function "pca_train", line 87, in pca
-- PL/Python function "pca_train", line 235, in pca_wrap
-- PL/Python function "pca_train"
-- ********** Error **********

-- 
-- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = integer
-- SQL state: 42883
-- Hint: No operator matches the given name and argument type(s). You might need to add explicit type casts.
-- Context: Traceback (most recent call last):
-- PL/Python function "pca_train", line 23, in <module>
-- return pca.pca(**globals())
-- PL/Python function "pca_train", line 87, in pca
-- PL/Python function "pca_train", line 235, in pca_wrap
-- PL/Python function "pca_train"

  was:
{{The issue is that PCA train does not run when the grouping column is text (have not tested other non-integer data types). See below for error reproduced on a modified example from the docs.}}
{{  }}
{{DROP TABLE IF EXISTS mat_group_text;}}
{{ CREATE TABLE mat_group_text (}}
{{     id integer,}}
{{     row_vec double precision[],}}
{{     matrix_id_text text}}
{{ );}}
{{ INSERT INTO mat_group_text VALUES}}
{{ (1, '\{1,2,3}', '1'),}}
{{ (2, '\{2,1,2}', '1'),}}
{{ (3, '\{3,2,1}', '1'),}}
{{ (4, '\{1,2,3,4,5}', '2'),}}
{{ (5, '\{2,5,2,4,1}', '2'),}}
{{ (6, '\{5,4,3,2,1}', '2');}}
{{  }}
{{ DROP TABLE IF EXISTS result_table_group_text, result_table_group_text_mean;}}
{{ SELECT madlib.pca_train('mat_group_text',             -- Source table}}
{{                         'result_table_group_text',    – Output table}}
{{                         'id',                    – Row id of source table}}
{{                          0.8,                    – Proportion of variance}}
{{                         'matrix_id_text');            – Grouping column}}
{{ SELECT * FROM result_table_group_text ORDER BY matrix_id_text, row_id_text;}}
{{ – NOTICE:  table "result_table_group_text" does not exist, skipping}}
{{ – NOTICE:  table "result_table_group_text_mean" does not exist, skipping}}
{{ – ERROR:  plpy.SPIError: plpy.SPIError: operator does not exist: text = integer}}
{{ – LINE 5:                      WHERE matrix_id_text=1}}
{{ --                                                  ^}}
{{ – HINT:  No operator matches the given name and argument type(s). You might need to add explicit type casts.}}
{{ – QUERY:  }}
{{ --                     CREATE TABLE pg_temp.__madlib_temp_57228654_1520981521_47712361__group_0 AS}}
{{ --                     SELECT  ROW_NUMBER() OVER() AS row_id, row_vec}}
{{ --                     FROM mat_group_text}}
{{ --                      WHERE matrix_id_text=1}}
{{ --                 }}
{{ – CONTEXT:  Traceback (most recent call last):}}
{{ --   PL/Python function "pca_train", line 23, in <module>}}
{{ --     return pca.pca(**globals())}}
{{ --   PL/Python function "pca_train", line 87, in pca}}
{{ --   PL/Python function "pca_train", line 235, in pca_wrap}}
{{ – PL/Python function "pca_train"}}
{{ – ********** Error **********}}
{{ !https://ssl.gstatic.com/ui/v1/icons/mail/images/cleardot.gif!}}
{{ -- }}
{{ – ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = integer}}
{{ – SQL state: 42883}}
{{ – Hint: No operator matches the given name and argument type(s). You might need to add explicit type casts.}}
{{ – Context: Traceback (most recent call last):}}
{{ --   PL/Python function "pca_train", line 23, in <module>}}
{{ --     return pca.pca(**globals())}}
{{ --   PL/Python function "pca_train", line 87, in pca}}
{{ --   PL/Python function "pca_train", line 235, in pca_wrap}}
{{ – PL/Python function "pca_train"}}


> PCA error with text grouping column
> -----------------------------------
>
>                 Key: MADLIB-1215
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1215
>             Project: Apache MADlib
>          Issue Type: Bug
>          Components: All Modules
>            Reporter: Rashmi Raghu
>            Priority: Major
>
> {{The issue is that PCA train does not run when the grouping column is text (have not tested other non-integer data types). See below for error reproduced on a modified example from the docs.}}
> DROP TABLE IF EXISTS mat_group_text;
> CREATE TABLE mat_group_text (
>  id integer,
>  row_vec double precision[],
>  matrix_id_text text
> );
> INSERT INTO mat_group_text VALUES
> (1, '\{1,2,3}', '1'),
> (2, '\{2,1,2}', '1'),
> (3, '\{3,2,1}', '1'),
> (4, '\{1,2,3,4,5}', '2'),
> (5, '\{2,5,2,4,1}', '2'),
> (6, '\{5,4,3,2,1}', '2');
> DROP TABLE IF EXISTS result_table_group_text, result_table_group_text_mean;
> SELECT madlib.pca_train('mat_group_text', -- Source table
>  'result_table_group_text', -- Output table
>  'id', -- Row id of source table
>  0.8, -- Proportion of variance
>  'matrix_id_text'); -- Grouping column
> SELECT * FROM result_table_group_text ORDER BY matrix_id_text, row_id_text;
> -- NOTICE: table "result_table_group_text" does not exist, skipping
> -- NOTICE: table "result_table_group_text_mean" does not exist, skipping
> -- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = integer
> -- LINE 5: WHERE matrix_id_text=1
> -- ^
> -- HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts.
> -- QUERY: 
> -- CREATE TABLE pg_temp.__madlib_temp_57228654_1520981521_47712361__group_0 AS
> -- SELECT ROW_NUMBER() OVER() AS row_id, row_vec
> -- FROM mat_group_text
> -- WHERE matrix_id_text=1
> -- 
> -- CONTEXT: Traceback (most recent call last):
> -- PL/Python function "pca_train", line 23, in <module>
> -- return pca.pca(**globals())
> -- PL/Python function "pca_train", line 87, in pca
> -- PL/Python function "pca_train", line 235, in pca_wrap
> -- PL/Python function "pca_train"
> -- ********** Error **********
> -- 
> -- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = integer
> -- SQL state: 42883
> -- Hint: No operator matches the given name and argument type(s). You might need to add explicit type casts.
> -- Context: Traceback (most recent call last):
> -- PL/Python function "pca_train", line 23, in <module>
> -- return pca.pca(**globals())
> -- PL/Python function "pca_train", line 87, in pca
> -- PL/Python function "pca_train", line 235, in pca_wrap
> -- PL/Python function "pca_train"



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)