You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2016/01/20 22:23:43 UTC

[2/2] incubator-madlib git commit: Elastic Net: Check only if features are numeric

Elastic Net: Check only if features are numeric

JIRA: MADLIB-952

Columns were being checked to ensure every column is of the same numeric
type. While giving an error for non-numeric types is correct, there is
no need to enforce them to be of same numeric type, as the columns are
cast to float8[] inside the function. The input analyzer has been
changed to relax this condition.

This closes #12.


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/d282e0f3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/d282e0f3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/d282e0f3

Branch: refs/heads/master
Commit: d282e0f362c522e74f310b0148d392db6d63ac7c
Parents: 83391ac
Author: Orhan Kislal <ok...@pivotal.io>
Authored: Wed Jan 20 13:13:08 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Wed Jan 20 13:22:54 2016 -0800

----------------------------------------------------------------------
 .../modules/elastic_net/elastic_net.py_in       |  6 ++---
 .../elastic_net_optimizer_fista.py_in           |  2 +-
 .../test/elastic_net_install_check.sql_in       | 27 ++++++++++++++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/elastic_net.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net.py_in b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
index 21c581a..22266f9 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
@@ -302,7 +302,6 @@ def elastic_net_train(schema_madlib, tbl_source, tbl_result, col_dep_var,
                    SELECT {schema_madlib}.elastic_net_train('gaussian');
                    for supported optimizers.
                    """.format(schema_madlib=schema_madlib))
-
     # handle all special cases of col_ind_var
     col_ind_var, outstr_array = analyze_input_str(schema_madlib, tbl_source,
                                                   col_ind_var, col_dep_var,
@@ -420,13 +419,12 @@ def analyze_input_str(schema_madlib, tbl_source,
                                             col_ind_var)
         else:
             included_col_types = [col_types_dict[i] for i in outstr_array]
-            if not all(i == included_col_types[0] and is_psql_numeric_type(i)
+            if not all(is_psql_numeric_type(i)
                        for i in included_col_types):
                 plpy.error("""
                            Elastic Net error: All columns to be included in the
-                           independent variables should be of the same numeric type.
+                           independent variables should be of the numeric type.
                            """)
-
         col_ind_var_new = "ARRAY[" + ','.join(outstr_array) + "]"
         return (col_ind_var_new, outstr_array)
 

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in b/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
index a546de4..e3e5977 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
@@ -422,7 +422,7 @@ def __compute_fista(schema_madlib, func_step_aggregate, func_state_diff,
         arguments
     @param tbl_state Name of the (temporary) table containing the inter-iteration
         states
-    @param rel_source Name of the relation containing input points
+    @param tbl_source Name of the relation containing input points
     @param col_ind_var Name of the independent variables column
     @param col_dep_var Name of the dependent variable column
     @param drop_table Boolean, whether to use IterationController (True) or

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
index c45ccd2..643750a 100644
--- a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
+++ b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
@@ -514,6 +514,16 @@ COPY lin_housing_wi (x, y) FROM STDIN NULL '?';
 {1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88}	11.90
 \.
 
+DROP TABLE IF EXISTS elastic_type_src;
+
+CREATE TABLE elastic_type_src ( var_int int, var_float8 float8, var_sint smallint);
+COPY elastic_type_src (var_int, var_float8, var_sint) FROM stdin DELIMITER ',' NULL '?' ;
+1, 1.1, 1
+2, 2.2, 2
+3, 3.3, 3
+4, 4.4, 4
+\.
+
 create function check_elastic_net ()
 returns void as $$
 begin
@@ -614,6 +624,23 @@ begin
         'Elastic Net: log-likelihood mismatch (use_active_set = t)!'
     ) from house_en;
 
+    execute 'DROP TABLE IF EXISTS elastic_type_res';
+    perform elastic_net_train('elastic_type_src',
+		'elastic_type_res',
+		'var_int < 0',
+		'*',
+		'binomial',
+		0.6,
+		0.02,
+		TRUE,
+		NULL,
+		'fista',
+		'',
+		'',
+		10000,
+		1e-6
+	);
+
 end;
 $$ language plpgsql volatile;