You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/04/29 18:57:22 UTC
[madlib] branch master updated: DL: Trap unsupported options for compile and fit params

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new 6cae627  DL: Trap unsupported options for compile and fit params
6cae627 is described below

commit 6cae627d47ea93321962d0b73a20b6333721c8d9
Author: Orhan Kislal <ok...@pivotal.io>
AuthorDate: Mon Apr 29 11:55:12 2019 -0700

    DL: Trap unsupported options for compile and fit params
    
    JIRA: MADLIB-1309
    
    This commit adds error messages for the unspoorted options for compile
    and test params.
    
    Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
 .../modules/deep_learning/madlib_keras.py_in       |  2 +-
 .../deep_learning/madlib_keras_wrapper.py_in       | 83 +++++++++++++++++-----
 .../modules/deep_learning/test/madlib_keras.sql_in | 17 +++++
 .../test/unit_tests/test_madlib_keras.py_in        | 38 ++++++++--
 4 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 1668a7b..bd12f55 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -361,7 +361,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
     start_fit = time.time()
     with K.tf.device(device_name):
         #TODO consider not doing this every time
-        fit_params = parse_fit_params(fit_params)
+        fit_params = parse_and_validate_fit_params(fit_params)
         history = segment_model.fit(x_train, y_train, **fit_params)
         loss = history.history['loss'][0]
         accuracy = history.history['acc'][0]
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 2e2250e..71c257f 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -19,6 +19,7 @@
 
 import ast
 import os
+import plpy
 
 # Do not remove `import keras` although it's not directly used in this file.
 # See madlib_keras.py_in for more details
@@ -102,15 +103,15 @@ def convert_string_of_args_to_dict(str_of_args):
         elif not stack and char == ",":
             value_str = result_str
             result_str = ""
-            compile_dict[key_str.strip()]=value_str.strip('\'')
+            compile_dict[key_str.strip()]=value_str.strip().strip('\'')
         else:
             result_str += char
     value_str = result_str
-    compile_dict[key_str.strip()]=value_str.strip('\'')
+    compile_dict[key_str.strip()]=value_str.strip().strip('\'')
     return compile_dict
 
 # Parse the compile parameters and the optimizer.
-def parse_compile_params(str_of_args):
+def parse_and_validate_compile_params(str_of_args):
     """
     Args:
         @param: str_of_args     The string of arguments given by the user
@@ -124,10 +125,14 @@ def parse_compile_params(str_of_args):
 
     opt_name,opt_args = parse_optimizer(compile_dict)
     compile_dict['loss'] = parse_loss(compile_dict)
+    literal_eval_compile_params = ['metrics', 'loss_weights',
+                                   'weighted_metrics', 'sample_weight_mode']
+    accepted_compile_params = literal_eval_compile_params + ['optimizer', 'loss']
 
-    compile_dict['metrics'] = ast.literal_eval(compile_dict['metrics']) if 'metrics' in compile_dict.keys() else None
-    compile_dict['loss_weights'] = ast.literal_eval(compile_dict['loss_weights']) if 'loss_weights' in compile_dict.keys() else None
-
+    compile_dict = validate_and_literal_eval_keys(compile_dict,
+                                                  literal_eval_compile_params,
+                                                  accepted_compile_params)
+    validate_compile_param_types(compile_dict)
     return (opt_name,opt_args,compile_dict)
 
 # Parse the optimizer name and params.
@@ -178,11 +183,33 @@ def parse_loss(compile_dict):
     return compile_dict['loss']
 
 # Parse the fit parameters into a dictionary.
-def parse_fit_params(str_of_args):
-    compile_dict = convert_string_of_args_to_dict(str_of_args)
-    for key in compile_dict.keys():
-        compile_dict[key] = ast.literal_eval(compile_dict[key])
-    return compile_dict
+def parse_and_validate_fit_params(fit_param_str):
+    fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
+
+    literal_eval_fit_params = ['batch_size','epochs','verbose','shuffle',
+                           'class_weight','initial_epoch','steps_per_epoch']
+    accepted_fit_params = literal_eval_fit_params + ['shuffle']
+
+    fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
+                                                     literal_eval_fit_params,
+                                                     accepted_fit_params)
+    return fit_params_dict
+
+# Validate the keys of the given dictionary and run literal_eval on the
+# user-defined subset
+def validate_and_literal_eval_keys(keys_dict, literal_eval_list, accepted_list):
+
+    for ckey in keys_dict.keys():
+        _assert(ckey in accepted_list,
+            "{0} is not accepted as a parameter yet. "
+            "Please review the user docs".format(ckey))
+        if ckey in literal_eval_list:
+            try:
+                keys_dict[ckey] = ast.literal_eval(keys_dict[ckey])
+            except ValueError:
+                plpy.error(("invalid input value for parameter {0}, "
+                            "please refer to the documentation").format(ckey))
+    return keys_dict
 
 # Split and strip the whitespace of key=value formatted strings
 def split_and_strip(x):
@@ -202,8 +229,32 @@ def get_optimizers():
 # Run the keras.compile with the given parameters
 def compile_model(model, compile_params):
     optimizers = get_optimizers()
-    (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
-    optimizer = optimizers[opt_name](**final_args) if final_args else opt_name
-    model.compile(optimizer=optimizer,
-                  loss=compile_dict['loss'],
-                  metrics=compile_dict['metrics'])
+    (opt_name,final_args,compile_dict) = parse_and_validate_compile_params(compile_params)
+    compile_dict['optimizer'] = optimizers[opt_name](**final_args) if final_args else opt_name
+    model.compile(**compile_dict)
+
+def validate_compile_param_types(compile_dict):
+
+    _assert(compile_dict['metrics'] is None or
+            type(compile_dict['metrics']) is list,
+            "wrong input type for compile parameter metrics: multi-output model"
+            "and user defined metrics are not supported yet, please pass a list")
+
+    _assert('loss_weights' not in compile_dict.keys() or
+            compile_dict['loss_weights'] is None or
+            type(compile_dict['loss_weights']) is list or
+            type(compile_dict['loss_weights']) is dict,
+            "wrong input type for compile parameter loss_weights: only list "
+            "and dictionary are supported for now")
+
+    _assert('weighted_metrics' not in compile_dict.keys() or
+            compile_dict['weighted_metrics'] is None or
+            type(compile_dict['weighted_metrics']) is list,
+            "wrong input type for compile parameter weighted_metrics: only list "
+            "is supported for now")
+
+    _assert('sample_weight_mode' not in compile_dict.keys() or
+            compile_dict['sample_weight_mode'] is None or
+            compile_dict['sample_weight_mode'] == "temporal",
+            """compile parameter sample_weight_mode can only be "temporal" or None""")
+
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 8b68aa9..3d1f8d7 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -231,6 +231,7 @@ select assert(trap_error($TRAP$madlib_keras_predict(
     'cifar10_predict');$TRAP$) = 1,
     'Passing batched image table to predict should error out.');
 
+-- Compile and fit parameter tests
 DROP TABLE IF EXISTS keras_out, keras_out_summary;
 SELECT madlib_keras_fit(
     'cifar_10_sample_batched',
@@ -271,6 +272,22 @@ SELECT madlib_keras_fit(
     1,
     $$ optimizer=Adam(epsilon=None), loss=losses.categorical_crossentropy, metrics=['accuracy']$$::text,
     $$ batch_size=2, epochs=1, verbose=0 $$::text,
+
+    1,
+    FALSE,
+    NULL,
+    'model name', 'model desc');
+
+DROP TABLE IF EXISTS keras_out, keras_out_summary;
+SELECT madlib_keras_fit(
+    'cifar_10_sample_batched',
+    'keras_out',
+    'dependent_var',
+    'independent_var',
+    'model_arch',
+    1,
+    $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), metrics=['accuracy'], loss_weights=[2], sample_weight_mode=None, loss='categorical_crossentropy' $$::text,
+    $$ epochs=10, verbose=0, shuffle=True, initial_epoch=1, steps_per_epoch=2 $$::text,
     1,
     FALSE,
     NULL,
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 2a9a427..0c4072b 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -241,11 +241,19 @@ class MadlibKerasFitTestCase(unittest.TestCase):
     def test_split_and_strip(self):
         self.assertEqual(('a','b'), self.subject.split_and_strip(' a = b '))
 
-    def test_parse_fit_params(self):
+    def test_parse_and_validate_fit_params(self):
         result = {'batch_size':2, 'epochs':1, 'verbose':0}
-        self.assertDictEqual(result, self.subject.parse_fit_params('batch_size=2, epochs=1, verbose=0'))
+        self.assertDictEqual(result, self.subject.parse_and_validate_fit_params('batch_size=2, epochs=1, verbose=0'))
 
     def test_parse_optimizer(self):
+
+        import keras.losses as losses
+
+        loss_func = losses.categorical_crossentropy
+        compile_dict = {'loss':'losses.categorical_crossentropy'}
+        self.assertEqual(self.subject.parse_loss(compile_dict), loss_func)
+
+    def test_parse_loss(self):
         opt_name = 'SGD'
         final_args = {'lr':0.01, 'decay':1e-6, 'nesterov':True}
         compile_dict = {}
@@ -255,13 +263,33 @@ class MadlibKerasFitTestCase(unittest.TestCase):
         self.assertEqual(result_name, opt_name)
         self.assertDictEqual(result_params, final_args)
 
-    def test_parse_compile_params(self):
+    def test_parse_and_validate_compile_params(self):
 
         test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']"
-        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', 'metrics':['accuracy'], 'loss':'categorical_crossentropy', 'loss_weights': None}
-        opt_name,opt_args,result_params = self.subject.parse_compile_params(test_str)
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', 'metrics':['accuracy'], 'loss':'categorical_crossentropy'}
+        opt_name,opt_args,result_params = self.subject.parse_and_validate_compile_params(test_str)
         self.assertDictEqual(result_params, compile_dict)
 
+    def test_parse_and_validate_fit_params(self):
+
+        test_str = "batch_size=2, epochs=1, verbose=0"
+        fit_dict = {'batch_size':2, 'epochs':1, 'verbose':0}
+        result_params = self.subject.parse_and_validate_fit_params(test_str)
+        self.assertDictEqual(result_params, fit_dict)
+
+    def test_validate_and_literal_eval_keys(self):
+
+        test_dict = {'batch_size':'2', 'epochs':'1', 'verbose':'0'}
+        target_dict = {'batch_size':2, 'epochs':1, 'verbose':0}
+        literal_eval_fit_params = ['batch_size','epochs','verbose','shuffle',
+                           'class_weight','initial_epoch','steps_per_epoch']
+        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        result_params = self.subject.validate_and_literal_eval_keys(
+                            test_dict,
+                            literal_eval_fit_params,
+                            accepted_fit_params)
+        self.assertDictEqual(result_params, target_dict)
+
 class MadlibKerasValidatorTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')