You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2019/04/09 21:29:05 UTC

[madlib] branch master updated: DL: Add new columns in fit output summary table.

This is an automated email from the ASF dual-hosted git repository.

njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new aa77914  DL: Add new columns in fit output summary table.
aa77914 is described below

commit aa77914790cec049ab51b3230077e2be62616787
Author: Domino Valdano <dv...@pivotal.io>
AuthorDate: Mon Apr 8 17:33:46 2019 -0700

    DL: Add new columns in fit output summary table.
    
    JIRA: MADLIB-1319
    
    Columns `dependent_vartype` and `normalizing_const` are useful to have
    in the model summary table since they can be used in predict.
    1) For instance, `dependent_vartype` will be useful in
    https://issues.apache.org/jira/browse/MADLIB-1315 while creating the
    prediction output column. In case of response, the output column should
    be of the same type as the dependent variable in train data.
    2) While predicting, `normalizing_const` will be useful to normalize the
    test data using the same normalizing constant that was used to normalize
    the train data.
    
    Closes #365
    
    Co-authored-by: Nandish Jayaram <nj...@apache.org>
---
 .../modules/deep_learning/madlib_keras.py_in       | 25 ++++++++++++++++------
 .../deep_learning/madlib_keras_helper.py_in        | 11 +++-------
 .../deep_learning/madlib_keras_predict.py_in       |  5 +++--
 .../modules/deep_learning/test/madlib_keras.sql_in |  2 ++
 .../postgres/modules/utilities/validate_args.py_in | 18 ++++++++++++++++
 5 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 364e9d6..55892d2 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -32,14 +32,16 @@ from keras.optimizers import *
 from keras.regularizers import *
 
 from madlib_keras_helper import CLASS_VALUES_COLNAME
+from madlib_keras_helper import DEPENDENT_VARTYPE
+from madlib_keras_helper import NORMALIZING_CONST_COLNAME
 from madlib_keras_helper import FitInputValidator
-from madlib_keras_helper import get_class_values_and_type
 from madlib_keras_helper import get_data_as_np_array
 from madlib_keras_wrapper import *
 
 from utilities.model_arch_info import get_input_shape
 from utilities.model_arch_info import get_num_classes
 from utilities.utilities import madlib_version
+from utilities.validate_args import get_col_value_and_type
 
 def fit(schema_madlib, source_table, model, dependent_varname,
         independent_varname, model_arch_table, model_arch_id, compile_params,
@@ -193,8 +195,12 @@ def fit(schema_madlib, source_table, model, dependent_varname,
     if validation_aggregate_loss and len(validation_aggregate_loss) > 0:
         final_validation_loss = validation_aggregate_loss[-1]
     version = madlib_version(schema_madlib)
-    class_values, class_values_type = get_class_values_and_type(
-        fit_validator.source_summary_table)
+    class_values, class_values_type = get_col_value_and_type(
+        fit_validator.source_summary_table, CLASS_VALUES_COLNAME)
+    norm_const, norm_const_type = get_col_value_and_type(
+        fit_validator.source_summary_table, NORMALIZING_CONST_COLNAME)
+    dep_vartype = plpy.execute("SELECT {0} AS dep FROM {1}".format(
+        DEPENDENT_VARTYPE, fit_validator.source_summary_table))[0]['dep']
     create_output_summary_table = plpy.prepare("""
         CREATE TABLE {0}_summary AS
         SELECT
@@ -225,8 +231,11 @@ def fit(schema_madlib, source_table, model, dependent_varname,
         $25 AS loss_validation,
         $26 AS accuracy_iter_validation,
         $27 AS loss_iter_validation,
-        $28 AS {1}
-        """.format(model, CLASS_VALUES_COLNAME),
+        $28 AS {1},
+        $29 AS {2},
+        $30 AS {3}
+        """.format(model, CLASS_VALUES_COLNAME, DEPENDENT_VARTYPE,
+                   NORMALIZING_CONST_COLNAME),
                    ["TEXT", "INTEGER", "TEXT", "TIMESTAMP",
                     "TIMESTAMP", "TEXT", "TEXT","TEXT",
                     "TEXT", "TEXT", "TEXT", "TEXT", "INTEGER",
@@ -236,7 +245,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
                     "DOUBLE PRECISION[]", "TIMESTAMP[]",
                     "DOUBLE PRECISION", "DOUBLE PRECISION",
                     "DOUBLE PRECISION[]", "DOUBLE PRECISION[]",
-                    class_values_type])
+                    class_values_type, "TEXT", norm_const_type])
     plpy.execute(
         create_output_summary_table,
         [
@@ -255,7 +264,9 @@ def fit(schema_madlib, source_table, model, dependent_varname,
             final_validation_loss,
             validation_aggregate_accuracy,
             validation_aggregate_loss,
-            class_values
+            class_values,
+            dep_vartype,
+            norm_const
         ]
         )
 
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index bd3963e..3313dd3 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -197,6 +197,9 @@ def get_data_as_np_array(table_name, y, x, input_shape, num_classes):
     return x_validation, y_validation
 
 CLASS_VALUES_COLNAME = "class_values"
+NORMALIZING_CONST_COLNAME = "normalizing_const"
+DEPENDENT_VARTYPE = "dependent_vartype"
+
 class FitInputValidator:
     def __init__(self, source_table, validation_table, output_model_table,
                  model_arch_table, dependent_varname, independent_varname,
@@ -299,11 +302,3 @@ class FitInputValidator:
                     " {2} in table {3}.".format(
                         input_shape, input_shape_from_table,
                         self.independent_varname, table))
-
-def get_class_values_and_type(source_summary_table):
-    class_values = plpy.execute("SELECT {0} AS class_values FROM {1}".
-        format(CLASS_VALUES_COLNAME, source_summary_table)
-        )[0]['class_values']
-    class_values_type = get_expr_type(CLASS_VALUES_COLNAME,
-                                      source_summary_table)
-    return class_values, class_values_type
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 6b1dbc6..bf14d1e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -34,7 +34,7 @@ from utilities.validate_args import output_tbl_valid
 
 from madlib_keras_wrapper import compile_and_set_weights
 from madlib_keras_wrapper import convert_string_of_args_to_dict
-from madlib_keras_helper import get_class_values_and_type
+from madlib_keras_helper import CLASS_VALUES_COLNAME
 from madlib_keras_helper import KerasWeightsSerializer
 
 def predict(schema_madlib, model_table, test_table, id_col, model_arch_table,
@@ -60,7 +60,8 @@ def predict(schema_madlib, model_table, test_table, id_col, model_arch_table,
     input_shape = get_input_shape(model_arch)
     compile_params = "$madlib$" + compile_params + "$madlib$"
     model_summary_table = add_postfix(model_table, "_summary")
-    class_values, _ = get_class_values_and_type(model_summary_table)
+    class_values = plpy.execute("SELECT {0} AS cv FROM {1}".format(
+        CLASS_VALUES_COLNAME, model_summary_table))[0]['cv']
     predict_query = plpy.prepare("""
         CREATE TABLE {output_table} AS
         SELECT {id_col},
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 7865e55..a1743c9 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -85,7 +85,9 @@ SELECT assert(
         validation_table = 'cifar_10_sample_batched' AND
         model = 'keras_saved_out' AND
         dependent_varname = 'dependent_var' AND
+        dependent_vartype = 'smallint' AND
         independent_varname = 'independent_var' AND
+        normalizing_const = 255.0 AND
         name is NULL AND
         description is NULL AND
         model_size > 0 AND
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index ba7e960..b35e7ad 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -365,6 +365,24 @@ def get_cols_and_types(tbl):
     return list(zip(col_names, col_types))
 # -------------------------------------------------------------------------
 
+def get_col_value_and_type(table_name, column_name):
+    """
+        Return the value and type of a column from a table.
+        Args:
+            @param table_name
+            @param column_name
+        Returns column_value, column_type
+    """
+    if table_name is None or table_name.lower() == 'null':
+        plpy.error('Input error: Table name (NULL) is invalid.')
+    if not is_var_valid(table_name, column_name):
+        plpy.error('Input error: Column name is invalid.')
+    value = plpy.execute("SELECT {0} AS value FROM {1}".
+        format(column_name, table_name)
+        )[0]['value']
+    col_type = get_expr_type(column_name, table_name)
+    return value, col_type
+# -------------------------------------------------------------------------
 
 def get_expr_type(expressions, tbl):
     """ Return the type of a multiple expressions run on a given table