You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2016/03/26 00:49:01 UTC

[2/3] incubator-madlib git commit: Elastic Net Predict: Skip arrays with NULL values

Elastic Net Predict: Skip arrays with NULL values

Jira: MADLIB-919

Having NULL values in the input array led to an error while converting
the array to a MappedColumnVector. This fix skips prediction for
arrays with NULL values.

Closes #31


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/7d4eec9d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/7d4eec9d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/7d4eec9d

Branch: refs/heads/master
Commit: 7d4eec9d443c7aef8a63565d520dee0ba773d929
Parents: 62a99ce
Author: Nandish Jayaram <nj...@pivotal.io>
Authored: Fri Mar 11 14:58:48 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Fri Mar 25 16:48:32 2016 -0700

----------------------------------------------------------------------
 src/modules/elastic_net/elastic_net_utils.cpp   |  56 ++++++++++
 .../test/elastic_net_install_check.sql_in       | 102 ++++++++++++++++---
 2 files changed, 145 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/7d4eec9d/src/modules/elastic_net/elastic_net_utils.cpp
----------------------------------------------------------------------
diff --git a/src/modules/elastic_net/elastic_net_utils.cpp b/src/modules/elastic_net/elastic_net_utils.cpp
index ef90dfe..354c216 100644
--- a/src/modules/elastic_net/elastic_net_utils.cpp
+++ b/src/modules/elastic_net/elastic_net_utils.cpp
@@ -15,6 +15,20 @@ using namespace madlib::dbal::eigen_integration;
  */
 AnyType __elastic_net_gaussian_predict::run (AnyType& args)
 {
+    // throws an exception if the coefficients contain NULL values
+    try {
+        args[0].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        throw std::runtime_error(
+            "Elastic Net error: the coefficients contain NULL values");
+    }
+    // returns NULL if the feature has NULL values
+    try {
+        args[2].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        return Null();
+    }
+
     MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
     double intercept = args[1].getAs<double>();
     MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -30,6 +44,20 @@ AnyType __elastic_net_gaussian_predict::run (AnyType& args)
 */
 AnyType __elastic_net_binomial_predict::run (AnyType& args)
 {
+    // throws an exception if the coefficients contain NULL values
+    try {
+        args[0].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        throw std::runtime_error(
+            "Elastic Net error: the coefficients contain NULL values");
+    }
+    // returns NULL if the feature has NULL values
+    try {
+        args[2].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        return Null();
+    }
+
     MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
     double intercept = args[1].getAs<double>();
     MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -46,6 +74,20 @@ AnyType __elastic_net_binomial_predict::run (AnyType& args)
 */
 AnyType __elastic_net_binomial_prob::run (AnyType& args)
 {
+    // throws an exception if the coefficients contain NULL values
+    try {
+        args[0].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        throw std::runtime_error(
+            "Elastic Net error: the coefficients contain NULL values");
+    }
+    // returns NULL if the feature has NULL values
+    try {
+        args[2].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        return Null();
+    }
+
     MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
     double intercept = args[1].getAs<double>();
     MappedColumnVector x = args[2].getAs<MappedColumnVector>();
@@ -61,6 +103,20 @@ AnyType __elastic_net_binomial_prob::run (AnyType& args)
 */
 AnyType __elastic_net_binomial_loglikelihood::run (AnyType& args)
 {
+    // throws an exception if the coefficients contain NULL values
+    try {
+        args[0].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        throw std::runtime_error(
+            "Elastic Net error: the coefficients contain NULL values");
+    }
+    // returns NULL if the feature has NULL values
+    try {
+        args[3].getAs<MappedColumnVector>();
+    } catch(const ArrayWithNullException &e) {
+        return Null();
+    }
+
     MappedColumnVector coef = args[0].getAs<MappedColumnVector>();
     double intercept = args[1].getAs<double>();
     MappedColumnVector x = args[3].getAs<MappedColumnVector>();

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/7d4eec9d/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
index 643750a..a3ddf34 100644
--- a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
+++ b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
@@ -524,11 +524,39 @@ COPY elastic_type_src (var_int, var_float8, var_sint) FROM stdin DELIMITER ',' N
 4, 4.4, 4
 \.
 
+DROP TABLE IF EXISTS housing_test;
+CREATE TABLE housing_test (id serial, x float8[],y float8);
+COPY housing_test (x, y) FROM STDIN NULL '?';
+{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98}	24.00
+{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14}	21.60
+{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03}	34.70
+{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94}	33.40
+{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33}	36.20
+{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21}	28.70
+{1,0.06076,0.00,11.930,0,0.5730,6.9760,91.00,2.1675,1,273.0,21.00,396.90,5.64}	23.90
+{1,0.10959,0.00,11.930,0,0.5730,6.7940,89.30,2.3889,1,273.0,21.00,393.45,6.48}	22.00
+{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88}	11.90
+\.
+
+DROP TABLE IF EXISTS housing_test_null;
+CREATE TABLE housing_test_null (id serial, x float8[],y float8);
+COPY housing_test_null (x, y) FROM STDIN NULL '?';
+{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98}	24.00
+{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14}	21.60
+{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03}	34.70
+{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94}	33.40
+{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33}	36.20
+{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21}	28.70
+{1,0.06076,0.00,11.930,0,0.573, NULL,91.00,2.1675,1,273.0,21.00,396.90,5.64}	23.90
+{1,0.10959,0.00,11.930,0,0.5730,6.7940,89.30,NULL,1,273.0,21.00,393.45,6.48}	22.00
+{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,NULL,273.0,21.00,396.90,7.88}	11.90
+\.
+
 create function check_elastic_net ()
 returns void as $$
 begin
-    execute 'drop table if exists house_en';
-    perform elastic_net_train(
+    EXECUTE 'drop table if exists house_en';
+    PERFORM elastic_net_train(
         'lin_housing_wi',
         'house_en',
         'y',
@@ -545,7 +573,7 @@ begin
         1e-6
     );
 
-    perform assert(relative_error(log_likelihood, -14.41122) < 0.000001,
+    PERFORM assert(relative_error(log_likelihood, -14.41122) < 0.000001,
         'Elastic Net: log-likelihood mismatch (gaussian)!'
     ) from house_en;
 
@@ -555,9 +583,21 @@ begin
                                 'id',
                                 'house_en_pred');
 
+    EXECUTE 'DROP TABLE IF EXISTS house_test_gaussian';
+    PERFORM elastic_net_predict('house_en',
+                                'housing_test',
+                                'id',
+                                'house_test_gaussian');
+
+	EXECUTE 'DROP TABLE IF EXISTS house_test_null_gaussian';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test_null',
+                                'id',
+                                'house_test_null_gaussian');
+
     -- huge lamda making all coef to be zeroes
-    execute 'drop table if exists house_en';
-    perform elastic_net_train(
+    EXECUTE 'drop table if exists house_en';
+    PERFORM elastic_net_train(
         'lin_housing_wi',
         'house_en',
         'y',
@@ -580,8 +620,20 @@ begin
                                 'id',
                                 'house_en_pred');
 
-    execute 'drop table if exists house_en';
-    perform elastic_net_train(
+    EXECUTE 'DROP TABLE IF EXISTS house_test_gaussian';
+    PERFORM elastic_net_predict('house_en',
+                                'housing_test',
+                                'id',
+                                'house_test_gaussian');
+
+    EXECUTE 'DROP TABLE IF EXISTS house_test_null_gaussian';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test_null',
+                                'id',
+                                'house_test_null_gaussian');
+
+    EXECUTE 'drop table if exists house_en';
+    PERFORM elastic_net_train(
         'lin_housing_wi',
         'house_en',
         'y < 20',
@@ -598,12 +650,24 @@ begin
         1e-6
     );
 
-    perform assert(relative_error(log_likelihood, -0.542468) < 1e-4,
+    PERFORM assert(relative_error(log_likelihood, -0.542468) < 1e-4,
         'Elastic Net: log-likelihood mismatch (use_active_set = f)!'
     ) from house_en;
 
-    execute 'drop table if exists house_en';
-    perform elastic_net_train(
+    EXECUTE 'DROP TABLE IF EXISTS house_test_binomial';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test',
+                                'id',
+                                'house_test_binomial');
+
+	EXECUTE 'drop table if exists house_test_null_binomial';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test_null',
+                                'id',
+                                'house_test_null_binomial');
+
+    EXECUTE 'drop table if exists house_en';
+    PERFORM elastic_net_train(
         'lin_housing_wi',
         'house_en',
         'y < 20',
@@ -620,12 +684,24 @@ begin
         1e-6
     );
 
-    perform assert(relative_error(log_likelihood, -0.542468) < 1e-4,
+    PERFORM assert(relative_error(log_likelihood, -0.542468) < 1e-4,
         'Elastic Net: log-likelihood mismatch (use_active_set = t)!'
     ) from house_en;
 
-    execute 'DROP TABLE IF EXISTS elastic_type_res';
-    perform elastic_net_train('elastic_type_src',
+    EXECUTE 'DROP TABLE IF EXISTS house_test_binomial';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test',
+                                'id',
+                                'house_test_binomial');
+
+	EXECUTE 'drop table if exists house_test_null_binomial';
+	PERFORM elastic_net_predict('house_en',
+                                'housing_test_null',
+                                'id',
+                                'house_test_null_binomial');
+
+    EXECUTE 'DROP TABLE IF EXISTS elastic_type_res';
+    PERFORM elastic_net_train('elastic_type_src',
 		'elastic_type_res',
 		'var_int < 0',
 		'*',