You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2016/03/26 00:49:01 UTC
[2/3] incubator-madlib git commit: Elastic Net Predict: Skip arrays
with NULL values
Elastic Net Predict: Skip arrays with NULL values
Jira: MADLIB-919
Having NULL values in the input array led to an error while converting
the array to a MappedColumnVector. This fix skips prediction for
arrays with NULL values.
Closes #31
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/7d4eec9d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/7d4eec9d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/7d4eec9d
Branch: refs/heads/master
Commit: 7d4eec9d443c7aef8a63565d520dee0ba773d929
Parents: 62a99ce
Author: Nandish Jayaram <nj...@pivotal.io>
Authored: Fri Mar 11 14:58:48 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Fri Mar 25 16:48:32 2016 -0700
----------------------------------------------------------------------
src/modules/elastic_net/elastic_net_utils.cpp | 56 ++++++++++
.../test/elastic_net_install_check.sql_in | 102 ++++++++++++++++---
2 files changed, 145 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/7d4eec9d/src/modules/elastic_net/elastic_net_utils.cpp
----------------------------------------------------------------------
diff --git a/src/modules/elastic_net/elastic_net_utils.cpp b/src/modules/elastic_net/elastic_net_utils.cpp
index ef90dfe..354c216 100644
--- a/src/modules/elastic_net/elastic_net_utils.cpp
+++ b/src/modules/elastic_net/elastic_net_utils.cpp
@@ -15,6 +15,20 @@ using namespace madlib::dbal::eigen_integration;
*/
AnyType __elastic_net_gaussian_predict::run (AnyType& args)
{
+ // throws an exception if the coefficients contain NULL values
+ try {
+ args[0].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ throw std::runtime_error(
+ "Elastic Net error: the coefficients contain NULL values");
+ }
+ // returns NULL if the feature has NULL values
+ try {
+ args[2].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ return Null();
+ }
+
MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
double intercept = args[1].getAs<double>();
MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -30,6 +44,20 @@ AnyType __elastic_net_gaussian_predict::run (AnyType& args)
*/
AnyType __elastic_net_binomial_predict::run (AnyType& args)
{
+ // throws an exception if the coefficients contain NULL values
+ try {
+ args[0].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ throw std::runtime_error(
+ "Elastic Net error: the coefficients contain NULL values");
+ }
+ // returns NULL if the feature has NULL values
+ try {
+ args[2].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ return Null();
+ }
+
MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
double intercept = args[1].getAs<double>();
MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -46,6 +74,20 @@ AnyType __elastic_net_binomial_predict::run (AnyType& args)
*/
AnyType __elastic_net_binomial_prob::run (AnyType& args)
{
+ // throws an exception if the coefficients contain NULL values
+ try {
+ args[0].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ throw std::runtime_error(
+ "Elastic Net error: the coefficients contain NULL values");
+ }
+ // returns NULL if the feature has NULL values
+ try {
+ args[2].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ return Null();
+ }
+
MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
double intercept = args[1].getAs<double>();
MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -61,6 +103,20 @@ AnyType __elastic_net_binomial_prob::run (AnyType& args)
*/
AnyType __elastic_net_binomial_loglikelihood::run (AnyType& args)
{
+ // throws an exception if the coefficients contain NULL values
+ try {
+ args[0].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ throw std::runtime_error(
+ "Elastic Net error: the coefficients contain NULL values");
+ }
+ // returns NULL if the feature has NULL values
+ try {
+ args[3].getAs<MappedColumnVector>();
+ } catch(const ArrayWithNullException &e) {
+ return Null();
+ }
+
MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
double intercept = args[1].getAs<double>();
MappedColumnVector x = args[3].getAs<MappedColumnVector>();
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/7d4eec9d/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
index 643750a..a3ddf34 100644
--- a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
+++ b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
@@ -524,11 +524,39 @@ COPY elastic_type_src (var_int, var_float8, var_sint) FROM stdin DELIMITER ',' N
4, 4.4, 4
\.
+DROP TABLE IF EXISTS housing_test;
+CREATE TABLE housing_test (id serial, x float8[],y float8);
+COPY housing_test (x, y) FROM STDIN NULL '?';
+{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98} 24.00
+{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14} 21.60
+{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03} 34.70
+{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94} 33.40
+{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33} 36.20
+{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21} 28.70
+{1,0.06076,0.00,11.930,0,0.5730,6.9760,91.00,2.1675,1,273.0,21.00,396.90,5.64} 23.90
+{1,0.10959,0.00,11.930,0,0.5730,6.7940,89.30,2.3889,1,273.0,21.00,393.45,6.48} 22.00
+{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88} 11.90
+\.
+
+DROP TABLE IF EXISTS housing_test_null;
+CREATE TABLE housing_test_null (id serial, x float8[],y float8);
+COPY housing_test_null (x, y) FROM STDIN NULL '?';
+{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98} 24.00
+{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14} 21.60
+{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03} 34.70
+{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94} 33.40
+{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33} 36.20
+{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21} 28.70
+{1,0.06076,0.00,11.930,0,0.573, NULL,91.00,2.1675,1,273.0,21.00,396.90,5.64} 23.90
+{1,0.10959,0.00,11.930,0,0.5730,6.7940,89.30,NULL,1,273.0,21.00,393.45,6.48} 22.00
+{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,NULL,273.0,21.00,396.90,7.88} 11.90
+\.
+
create function check_elastic_net ()
returns void as $$
begin
- execute 'drop table if exists house_en';
- perform elastic_net_train(
+ EXECUTE 'drop table if exists house_en';
+ PERFORM elastic_net_train(
'lin_housing_wi',
'house_en',
'y',
@@ -545,7 +573,7 @@ begin
1e-6
);
- perform assert(relative_error(log_likelihood, -14.41122) < 0.000001,
+ PERFORM assert(relative_error(log_likelihood, -14.41122) < 0.000001,
'Elastic Net: log-likelihood mismatch (gaussian)!'
) from house_en;
@@ -555,9 +583,21 @@ begin
'id',
'house_en_pred');
+ EXECUTE 'DROP TABLE IF EXISTS house_test_gaussian';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test',
+ 'id',
+ 'house_test_gaussian');
+
+ EXECUTE 'DROP TABLE IF EXISTS house_test_null_gaussian';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test_null',
+ 'id',
+ 'house_test_null_gaussian');
+
-- huge lamda making all coef to be zeroes
- execute 'drop table if exists house_en';
- perform elastic_net_train(
+ EXECUTE 'drop table if exists house_en';
+ PERFORM elastic_net_train(
'lin_housing_wi',
'house_en',
'y',
@@ -580,8 +620,20 @@ begin
'id',
'house_en_pred');
- execute 'drop table if exists house_en';
- perform elastic_net_train(
+ EXECUTE 'DROP TABLE IF EXISTS house_test_gaussian';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test',
+ 'id',
+ 'house_test_gaussian');
+
+ EXECUTE 'DROP TABLE IF EXISTS house_test_null_gaussian';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test_null',
+ 'id',
+ 'house_test_null_gaussian');
+
+ EXECUTE 'drop table if exists house_en';
+ PERFORM elastic_net_train(
'lin_housing_wi',
'house_en',
'y < 20',
@@ -598,12 +650,24 @@ begin
1e-6
);
- perform assert(relative_error(log_likelihood, -0.542468) < 1e-4,
+ PERFORM assert(relative_error(log_likelihood, -0.542468) < 1e-4,
'Elastic Net: log-likelihood mismatch (use_active_set = f)!'
) from house_en;
- execute 'drop table if exists house_en';
- perform elastic_net_train(
+ EXECUTE 'DROP TABLE IF EXISTS house_test_binomial';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test',
+ 'id',
+ 'house_test_binomial');
+
+ EXECUTE 'drop table if exists house_test_null_binomial';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test_null',
+ 'id',
+ 'house_test_null_binomial');
+
+ EXECUTE 'drop table if exists house_en';
+ PERFORM elastic_net_train(
'lin_housing_wi',
'house_en',
'y < 20',
@@ -620,12 +684,24 @@ begin
1e-6
);
- perform assert(relative_error(log_likelihood, -0.542468) < 1e-4,
+ PERFORM assert(relative_error(log_likelihood, -0.542468) < 1e-4,
'Elastic Net: log-likelihood mismatch (use_active_set = t)!'
) from house_en;
- execute 'DROP TABLE IF EXISTS elastic_type_res';
- perform elastic_net_train('elastic_type_src',
+ EXECUTE 'DROP TABLE IF EXISTS house_test_binomial';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test',
+ 'id',
+ 'house_test_binomial');
+
+ EXECUTE 'drop table if exists house_test_null_binomial';
+ PERFORM elastic_net_predict('house_en',
+ 'housing_test_null',
+ 'id',
+ 'house_test_null_binomial');
+
+ EXECUTE 'DROP TABLE IF EXISTS elastic_type_res';
+ PERFORM elastic_net_train('elastic_type_src',
'elastic_type_res',
'var_int < 0',
'*',