You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by kh...@apache.org on 2020/03/16 17:36:18 UTC

[madlib] 04/04: DL: Fix fit multiple to create summary tables with class_values including NULL

This is an automated email from the ASF dual-hosted git repository.

khannaekta pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 7e4443bcee31a58a4dd4e9aea5b091bbdaa06759
Author: Ekta Khanna <ek...@pivotal.io>
AuthorDate: Wed Mar 11 13:41:32 2020 -0700

    DL: Fix fit multiple to create summary tables with class_values including NULL
    
    Prior to this commit, since the python array would convert SQL NULL
    values to None, creating summary table with such class_values array
    would fail. This commit fixes that issue.
---
 .../madlib_keras_fit_multiple_model.py_in           | 12 +++++-------
 .../test/madlib_keras_model_selection.sql_in        | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 93a86f9..a32421b 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -388,6 +388,7 @@ class FitMultipleModel():
             plpy.execute("DROP TABLE {0}".format(self.model_summary_table))
         src_summary_dict = get_source_summary_table_dict(self.fit_validator_train)
         class_values = src_summary_dict['class_values']
+        class_values_type = src_summary_dict['class_values_type']
         dep_vartype = src_summary_dict['dep_vartype']
         dependent_varname = \
             src_summary_dict['dependent_varname_in_source_table']
@@ -397,11 +398,8 @@ class FitMultipleModel():
         self.validation_table = 'NULL' if self.validation_table is None \
             else '$MAD${0}$MAD$'.format(self.validation_table)
         if class_values is None:
-            class_values_str = 'NULL::{0}'.format(src_summary_dict['class_values_type'])
             num_classes = 'NULL'
         else:
-            class_values_str = 'ARRAY{0}::{1}'.format(class_values,
-                                                      src_summary_dict['class_values_type'])
             num_classes = len(class_values)
         name = 'NULL' if self.name is None else '$MAD${0}$MAD$'.format(self.name)
         descr = 'NULL' if self.description is None else '$MAD${0}$MAD$'.format(self.description)
@@ -410,7 +408,7 @@ class FitMultipleModel():
         dependent_vartype_colname = DEPENDENT_VARTYPE_COLNAME
         normalizing_const_colname = NORMALIZING_CONST_COLNAME
         float32_sql_type = FLOAT32_SQL_TYPE
-        update_query = """
+        create_query = plpy.prepare("""
                 CREATE TABLE {self.model_summary_table} AS
                 SELECT
                     $MAD${self.source_table}$MAD$::TEXT AS source_table,
@@ -429,12 +427,12 @@ class FitMultipleModel():
                     '{self.end_training_time}'::TIMESTAMP AS end_training_time,
                     '{self.version}'::TEXT AS madlib_version,
                     {num_classes}::INTEGER AS num_classes,
-                    {class_values_str} AS {class_values_colname},
+                    $1 AS {class_values_colname},
                     $MAD${dep_vartype}$MAD$::TEXT AS {dependent_vartype_colname},
                     {norm_const}::{float32_sql_type} AS {normalizing_const_colname},
                     ARRAY{metrics_iters}::INTEGER[] AS metrics_iters
-            """.format(**locals())
-        plpy.execute(update_query)
+            """.format(**locals()), [class_values_type])
+        plpy.execute(create_query, [class_values])
 
     def update_info_table(self, mst, is_train):
         mst_key = mst[self.mst_key_col]
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index 26c1a34..ddf2e0f 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -367,4 +367,25 @@ SELECT assert(cnt = 1,
 FROM (SELECT count(*) cnt FROM iris_multiple_model_info
 WHERE compile_params = $MAD$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)', metrics=['accuracy']$MAD$::text
 AND fit_params = $MAD$batch_size=32, epochs=1$MAD$::text) info;
+
+-- Test when class values have NULL values
+UPDATE iris_data_packed_summary SET class_values = ARRAY['Iris-setosa','Iris-versicolor',NULL];
+DROP TABLE if exists iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
+SELECT madlib_keras_fit_multiple_model(
+	'iris_data_packed',
+	'iris_multiple_model',
+	'mst_table_1row',
+	1,
+	FALSE,
+	NULL,
+	1,
+	FALSE
+);
+
+SELECT assert(
+        num_classes = 3 AND
+        class_values = '{Iris-setosa,Iris-versicolor,NULL}',
+        'Keras Fit Multiple num_clases and class values Validation failed. Actual:' || __to_char(summary))
+FROM (SELECT * FROM iris_multiple_model_summary) summary;
+
 !>)