You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/04/29 18:57:22 UTC
[madlib] branch master updated: DL: Trap unsupported options for
compile and fit params
This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new 6cae627 DL: Trap unsupported options for compile and fit params
6cae627 is described below
commit 6cae627d47ea93321962d0b73a20b6333721c8d9
Author: Orhan Kislal <ok...@pivotal.io>
AuthorDate: Mon Apr 29 11:55:12 2019 -0700
DL: Trap unsupported options for compile and fit params
JIRA: MADLIB-1309
This commit adds error messages for the unspoorted options for compile
and test params.
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
.../modules/deep_learning/madlib_keras.py_in | 2 +-
.../deep_learning/madlib_keras_wrapper.py_in | 83 +++++++++++++++++-----
.../modules/deep_learning/test/madlib_keras.sql_in | 17 +++++
.../test/unit_tests/test_madlib_keras.py_in | 38 ++++++++--
4 files changed, 118 insertions(+), 22 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 1668a7b..bd12f55 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -361,7 +361,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
start_fit = time.time()
with K.tf.device(device_name):
#TODO consider not doing this every time
- fit_params = parse_fit_params(fit_params)
+ fit_params = parse_and_validate_fit_params(fit_params)
history = segment_model.fit(x_train, y_train, **fit_params)
loss = history.history['loss'][0]
accuracy = history.history['acc'][0]
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 2e2250e..71c257f 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -19,6 +19,7 @@
import ast
import os
+import plpy
# Do not remove `import keras` although it's not directly used in this file.
# See madlib_keras.py_in for more details
@@ -102,15 +103,15 @@ def convert_string_of_args_to_dict(str_of_args):
elif not stack and char == ",":
value_str = result_str
result_str = ""
- compile_dict[key_str.strip()]=value_str.strip('\'')
+ compile_dict[key_str.strip()]=value_str.strip().strip('\'')
else:
result_str += char
value_str = result_str
- compile_dict[key_str.strip()]=value_str.strip('\'')
+ compile_dict[key_str.strip()]=value_str.strip().strip('\'')
return compile_dict
# Parse the compile parameters and the optimizer.
-def parse_compile_params(str_of_args):
+def parse_and_validate_compile_params(str_of_args):
"""
Args:
@param: str_of_args The string of arguments given by the user
@@ -124,10 +125,14 @@ def parse_compile_params(str_of_args):
opt_name,opt_args = parse_optimizer(compile_dict)
compile_dict['loss'] = parse_loss(compile_dict)
+ literal_eval_compile_params = ['metrics', 'loss_weights',
+ 'weighted_metrics', 'sample_weight_mode']
+ accepted_compile_params = literal_eval_compile_params + ['optimizer', 'loss']
- compile_dict['metrics'] = ast.literal_eval(compile_dict['metrics']) if 'metrics' in compile_dict.keys() else None
- compile_dict['loss_weights'] = ast.literal_eval(compile_dict['loss_weights']) if 'loss_weights' in compile_dict.keys() else None
-
+ compile_dict = validate_and_literal_eval_keys(compile_dict,
+ literal_eval_compile_params,
+ accepted_compile_params)
+ validate_compile_param_types(compile_dict)
return (opt_name,opt_args,compile_dict)
# Parse the optimizer name and params.
@@ -178,11 +183,33 @@ def parse_loss(compile_dict):
return compile_dict['loss']
# Parse the fit parameters into a dictionary.
-def parse_fit_params(str_of_args):
- compile_dict = convert_string_of_args_to_dict(str_of_args)
- for key in compile_dict.keys():
- compile_dict[key] = ast.literal_eval(compile_dict[key])
- return compile_dict
+def parse_and_validate_fit_params(fit_param_str):
+ fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
+
+ literal_eval_fit_params = ['batch_size','epochs','verbose','shuffle',
+ 'class_weight','initial_epoch','steps_per_epoch']
+ accepted_fit_params = literal_eval_fit_params + ['shuffle']
+
+ fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
+ literal_eval_fit_params,
+ accepted_fit_params)
+ return fit_params_dict
+
+# Validate the keys of the given dictionary and run literal_eval on the
+# user-defined subset
+def validate_and_literal_eval_keys(keys_dict, literal_eval_list, accepted_list):
+
+ for ckey in keys_dict.keys():
+ _assert(ckey in accepted_list,
+ "{0} is not accepted as a parameter yet. "
+ "Please review the user docs".format(ckey))
+ if ckey in literal_eval_list:
+ try:
+ keys_dict[ckey] = ast.literal_eval(keys_dict[ckey])
+ except ValueError:
+ plpy.error(("invalid input value for parameter {0}, "
+ "please refer to the documentation").format(ckey))
+ return keys_dict
# Split and strip the whitespace of key=value formatted strings
def split_and_strip(x):
@@ -202,8 +229,32 @@ def get_optimizers():
# Run the keras.compile with the given parameters
def compile_model(model, compile_params):
optimizers = get_optimizers()
- (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
- optimizer = optimizers[opt_name](**final_args) if final_args else opt_name
- model.compile(optimizer=optimizer,
- loss=compile_dict['loss'],
- metrics=compile_dict['metrics'])
+ (opt_name,final_args,compile_dict) = parse_and_validate_compile_params(compile_params)
+ compile_dict['optimizer'] = optimizers[opt_name](**final_args) if final_args else opt_name
+ model.compile(**compile_dict)
+
+def validate_compile_param_types(compile_dict):
+
+ _assert(compile_dict['metrics'] is None or
+ type(compile_dict['metrics']) is list,
+ "wrong input type for compile parameter metrics: multi-output model"
+ "and user defined metrics are not supported yet, please pass a list")
+
+ _assert('loss_weights' not in compile_dict.keys() or
+ compile_dict['loss_weights'] is None or
+ type(compile_dict['loss_weights']) is list or
+ type(compile_dict['loss_weights']) is dict,
+ "wrong input type for compile parameter loss_weights: only list "
+ "and dictionary are supported for now")
+
+ _assert('weighted_metrics' not in compile_dict.keys() or
+ compile_dict['weighted_metrics'] is None or
+ type(compile_dict['weighted_metrics']) is list,
+ "wrong input type for compile parameter weighted_metrics: only list "
+ "is supported for now")
+
+ _assert('sample_weight_mode' not in compile_dict.keys() or
+ compile_dict['sample_weight_mode'] is None or
+ compile_dict['sample_weight_mode'] == "temporal",
+ """compile parameter sample_weight_mode can only be "temporal" or None""")
+
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 8b68aa9..3d1f8d7 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -231,6 +231,7 @@ select assert(trap_error($TRAP$madlib_keras_predict(
'cifar10_predict');$TRAP$) = 1,
'Passing batched image table to predict should error out.');
+-- Compile and fit parameter tests
DROP TABLE IF EXISTS keras_out, keras_out_summary;
SELECT madlib_keras_fit(
'cifar_10_sample_batched',
@@ -271,6 +272,22 @@ SELECT madlib_keras_fit(
1,
$$ optimizer=Adam(epsilon=None), loss=losses.categorical_crossentropy, metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
+
+ 1,
+ FALSE,
+ NULL,
+ 'model name', 'model desc');
+
+DROP TABLE IF EXISTS keras_out, keras_out_summary;
+SELECT madlib_keras_fit(
+ 'cifar_10_sample_batched',
+ 'keras_out',
+ 'dependent_var',
+ 'independent_var',
+ 'model_arch',
+ 1,
+ $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), metrics=['accuracy'], loss_weights=[2], sample_weight_mode=None, loss='categorical_crossentropy' $$::text,
+ $$ epochs=10, verbose=0, shuffle=True, initial_epoch=1, steps_per_epoch=2 $$::text,
1,
FALSE,
NULL,
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 2a9a427..0c4072b 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -241,11 +241,19 @@ class MadlibKerasFitTestCase(unittest.TestCase):
def test_split_and_strip(self):
self.assertEqual(('a','b'), self.subject.split_and_strip(' a = b '))
- def test_parse_fit_params(self):
+ def test_parse_and_validate_fit_params(self):
result = {'batch_size':2, 'epochs':1, 'verbose':0}
- self.assertDictEqual(result, self.subject.parse_fit_params('batch_size=2, epochs=1, verbose=0'))
+ self.assertDictEqual(result, self.subject.parse_and_validate_fit_params('batch_size=2, epochs=1, verbose=0'))
def test_parse_optimizer(self):
+
+ import keras.losses as losses
+
+ loss_func = losses.categorical_crossentropy
+ compile_dict = {'loss':'losses.categorical_crossentropy'}
+ self.assertEqual(self.subject.parse_loss(compile_dict), loss_func)
+
+ def test_parse_loss(self):
opt_name = 'SGD'
final_args = {'lr':0.01, 'decay':1e-6, 'nesterov':True}
compile_dict = {}
@@ -255,13 +263,33 @@ class MadlibKerasFitTestCase(unittest.TestCase):
self.assertEqual(result_name, opt_name)
self.assertDictEqual(result_params, final_args)
- def test_parse_compile_params(self):
+ def test_parse_and_validate_compile_params(self):
test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']"
- compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', 'metrics':['accuracy'], 'loss':'categorical_crossentropy', 'loss_weights': None}
- opt_name,opt_args,result_params = self.subject.parse_compile_params(test_str)
+ compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', 'metrics':['accuracy'], 'loss':'categorical_crossentropy'}
+ opt_name,opt_args,result_params = self.subject.parse_and_validate_compile_params(test_str)
self.assertDictEqual(result_params, compile_dict)
+ def test_parse_and_validate_fit_params(self):
+
+ test_str = "batch_size=2, epochs=1, verbose=0"
+ fit_dict = {'batch_size':2, 'epochs':1, 'verbose':0}
+ result_params = self.subject.parse_and_validate_fit_params(test_str)
+ self.assertDictEqual(result_params, fit_dict)
+
+ def test_validate_and_literal_eval_keys(self):
+
+ test_dict = {'batch_size':'2', 'epochs':'1', 'verbose':'0'}
+ target_dict = {'batch_size':2, 'epochs':1, 'verbose':0}
+ literal_eval_fit_params = ['batch_size','epochs','verbose','shuffle',
+ 'class_weight','initial_epoch','steps_per_epoch']
+ accepted_fit_params = literal_eval_fit_params + ['shuffle']
+ result_params = self.subject.validate_and_literal_eval_keys(
+ test_dict,
+ literal_eval_fit_params,
+ accepted_fit_params)
+ self.assertDictEqual(result_params, target_dict)
+
class MadlibKerasValidatorTestCase(unittest.TestCase):
def setUp(self):
self.plpy_mock = Mock(spec='error')