You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2019/04/09 21:29:05 UTC
[madlib] branch master updated: DL: Add new columns in fit output
summary table.
This is an automated email from the ASF dual-hosted git repository.
njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new aa77914 DL: Add new columns in fit output summary table.
aa77914 is described below
commit aa77914790cec049ab51b3230077e2be62616787
Author: Domino Valdano <dv...@pivotal.io>
AuthorDate: Mon Apr 8 17:33:46 2019 -0700
DL: Add new columns in fit output summary table.
JIRA: MADLIB-1319
Columns `dependent_vartype` and `normalizing_const` are useful to have
in the model summary table since they can be used in predict.
1) For instance, `dependent_vartype` will be useful in
https://issues.apache.org/jira/browse/MADLIB-1315 while creating the
prediction output column. In case of response, the output column should
be of the same type as the dependent variable in train data.
2) While predicting, `normalizing_const` will be useful to normalize the
test data using the same normalizing constant that was used to normalize
the train data.
Closes #365
Co-authored-by: Nandish Jayaram <nj...@apache.org>
---
.../modules/deep_learning/madlib_keras.py_in | 25 ++++++++++++++++------
.../deep_learning/madlib_keras_helper.py_in | 11 +++-------
.../deep_learning/madlib_keras_predict.py_in | 5 +++--
.../modules/deep_learning/test/madlib_keras.sql_in | 2 ++
.../postgres/modules/utilities/validate_args.py_in | 18 ++++++++++++++++
5 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 364e9d6..55892d2 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -32,14 +32,16 @@ from keras.optimizers import *
from keras.regularizers import *
from madlib_keras_helper import CLASS_VALUES_COLNAME
+from madlib_keras_helper import DEPENDENT_VARTYPE
+from madlib_keras_helper import NORMALIZING_CONST_COLNAME
from madlib_keras_helper import FitInputValidator
-from madlib_keras_helper import get_class_values_and_type
from madlib_keras_helper import get_data_as_np_array
from madlib_keras_wrapper import *
from utilities.model_arch_info import get_input_shape
from utilities.model_arch_info import get_num_classes
from utilities.utilities import madlib_version
+from utilities.validate_args import get_col_value_and_type
def fit(schema_madlib, source_table, model, dependent_varname,
independent_varname, model_arch_table, model_arch_id, compile_params,
@@ -193,8 +195,12 @@ def fit(schema_madlib, source_table, model, dependent_varname,
if validation_aggregate_loss and len(validation_aggregate_loss) > 0:
final_validation_loss = validation_aggregate_loss[-1]
version = madlib_version(schema_madlib)
- class_values, class_values_type = get_class_values_and_type(
- fit_validator.source_summary_table)
+ class_values, class_values_type = get_col_value_and_type(
+ fit_validator.source_summary_table, CLASS_VALUES_COLNAME)
+ norm_const, norm_const_type = get_col_value_and_type(
+ fit_validator.source_summary_table, NORMALIZING_CONST_COLNAME)
+ dep_vartype = plpy.execute("SELECT {0} AS dep FROM {1}".format(
+ DEPENDENT_VARTYPE, fit_validator.source_summary_table))[0]['dep']
create_output_summary_table = plpy.prepare("""
CREATE TABLE {0}_summary AS
SELECT
@@ -225,8 +231,11 @@ def fit(schema_madlib, source_table, model, dependent_varname,
$25 AS loss_validation,
$26 AS accuracy_iter_validation,
$27 AS loss_iter_validation,
- $28 AS {1}
- """.format(model, CLASS_VALUES_COLNAME),
+ $28 AS {1},
+ $29 AS {2},
+ $30 AS {3}
+ """.format(model, CLASS_VALUES_COLNAME, DEPENDENT_VARTYPE,
+ NORMALIZING_CONST_COLNAME),
["TEXT", "INTEGER", "TEXT", "TIMESTAMP",
"TIMESTAMP", "TEXT", "TEXT","TEXT",
"TEXT", "TEXT", "TEXT", "TEXT", "INTEGER",
@@ -236,7 +245,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
"DOUBLE PRECISION[]", "TIMESTAMP[]",
"DOUBLE PRECISION", "DOUBLE PRECISION",
"DOUBLE PRECISION[]", "DOUBLE PRECISION[]",
- class_values_type])
+ class_values_type, "TEXT", norm_const_type])
plpy.execute(
create_output_summary_table,
[
@@ -255,7 +264,9 @@ def fit(schema_madlib, source_table, model, dependent_varname,
final_validation_loss,
validation_aggregate_accuracy,
validation_aggregate_loss,
- class_values
+ class_values,
+ dep_vartype,
+ norm_const
]
)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index bd3963e..3313dd3 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -197,6 +197,9 @@ def get_data_as_np_array(table_name, y, x, input_shape, num_classes):
return x_validation, y_validation
CLASS_VALUES_COLNAME = "class_values"
+NORMALIZING_CONST_COLNAME = "normalizing_const"
+DEPENDENT_VARTYPE = "dependent_vartype"
+
class FitInputValidator:
def __init__(self, source_table, validation_table, output_model_table,
model_arch_table, dependent_varname, independent_varname,
@@ -299,11 +302,3 @@ class FitInputValidator:
" {2} in table {3}.".format(
input_shape, input_shape_from_table,
self.independent_varname, table))
-
-def get_class_values_and_type(source_summary_table):
- class_values = plpy.execute("SELECT {0} AS class_values FROM {1}".
- format(CLASS_VALUES_COLNAME, source_summary_table)
- )[0]['class_values']
- class_values_type = get_expr_type(CLASS_VALUES_COLNAME,
- source_summary_table)
- return class_values, class_values_type
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 6b1dbc6..bf14d1e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -34,7 +34,7 @@ from utilities.validate_args import output_tbl_valid
from madlib_keras_wrapper import compile_and_set_weights
from madlib_keras_wrapper import convert_string_of_args_to_dict
-from madlib_keras_helper import get_class_values_and_type
+from madlib_keras_helper import CLASS_VALUES_COLNAME
from madlib_keras_helper import KerasWeightsSerializer
def predict(schema_madlib, model_table, test_table, id_col, model_arch_table,
@@ -60,7 +60,8 @@ def predict(schema_madlib, model_table, test_table, id_col, model_arch_table,
input_shape = get_input_shape(model_arch)
compile_params = "$madlib$" + compile_params + "$madlib$"
model_summary_table = add_postfix(model_table, "_summary")
- class_values, _ = get_class_values_and_type(model_summary_table)
+ class_values = plpy.execute("SELECT {0} AS cv FROM {1}".format(
+ CLASS_VALUES_COLNAME, model_summary_table))[0]['cv']
predict_query = plpy.prepare("""
CREATE TABLE {output_table} AS
SELECT {id_col},
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 7865e55..a1743c9 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -85,7 +85,9 @@ SELECT assert(
validation_table = 'cifar_10_sample_batched' AND
model = 'keras_saved_out' AND
dependent_varname = 'dependent_var' AND
+ dependent_vartype = 'smallint' AND
independent_varname = 'independent_var' AND
+ normalizing_const = 255.0 AND
name is NULL AND
description is NULL AND
model_size > 0 AND
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index ba7e960..b35e7ad 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -365,6 +365,24 @@ def get_cols_and_types(tbl):
return list(zip(col_names, col_types))
# -------------------------------------------------------------------------
+def get_col_value_and_type(table_name, column_name):
+ """
+ Return the value and type of a column from a table.
+ Args:
+ @param table_name
+ @param column_name
+ Returns column_value, column_type
+ """
+ if table_name is None or table_name.lower() == 'null':
+ plpy.error('Input error: Table name (NULL) is invalid.')
+ if not is_var_valid(table_name, column_name):
+ plpy.error('Input error: Column name is invalid.')
+ value = plpy.execute("SELECT {0} AS value FROM {1}".
+ format(column_name, table_name)
+ )[0]['value']
+ col_type = get_expr_type(column_name, table_name)
+ return value, col_type
+# -------------------------------------------------------------------------
def get_expr_type(expressions, tbl):
""" Return the type of a multiple expressions run on a given table