You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@madlib.apache.org by GitBox <gi...@apache.org> on 2021/02/25 21:59:06 UTC
[GitHub] [madlib] kaknikhil commented on a change in pull request #524: DL: TensorBoard Support

kaknikhil commented on a change in pull request #524:
URL: https://github.com/apache/madlib/pull/524#discussion_r578764993



##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the dictionary
+def parse_callbacks(callbacks, current_seg_id=0):

Review comment:
       Does it make sense to add unit tests for this function ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
##########
@@ -361,7 +361,9 @@ class MstSearch():
             _assert(self.num_configs is None and self.random_state is None,
                     "DL: 'num_configs' and 'random_state' must be NULL for grid search")
             for distribution_type in self.accepted_distributions:
-                _assert(distribution_type not in compile_params_grid and distribution_type not in fit_params_grid,
+                tmp_dist = "'{0}']".format(distribution_type)
+                _assert(tmp_dist not in compile_params_grid and

Review comment:
       It might also make sense to add/update a test for this change. We can add both a positive and a negative test case to make sure that it passes and fails as expected

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
##########
@@ -361,7 +361,9 @@ class MstSearch():
             _assert(self.num_configs is None and self.random_state is None,
                     "DL: 'num_configs' and 'random_state' must be NULL for grid search")
             for distribution_type in self.accepted_distributions:
-                _assert(distribution_type not in compile_params_grid and distribution_type not in fit_params_grid,
+                tmp_dist = "'{0}']".format(distribution_type)

Review comment:
       I think adding a small explanation of why we need to format the type this way will help. Also renaming the variable to be something that will reflect it's purpose might make it easier to read.

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -309,6 +311,8 @@ def parse_and_validate_fit_params(fit_param_str):
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
         if 'callbacks' in fit_params_dict:
+            if not is_superuser(current_user()):

Review comment:
       Can we add a test for this ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -144,7 +147,7 @@ def set_model_weights(segment_model, serialized_weights):
 Used to convert compile_params and fit_params to actual argument dictionaries
 """
 
-def convert_string_of_args_to_dict(str_of_args):
+def convert_string_of_args_to_dict(str_of_args, strip_quotes=True):

Review comment:
       consider adding a docstring for the strip_quotes argument

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)

Review comment:
       All these assertions will raise a generic python assertion error without any message. Is that intentional ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -170,11 +173,19 @@ def convert_string_of_args_to_dict(str_of_args):
         elif not stack and char == ",":
             value_str = result_str
             result_str = ""
-            compile_dict[key_str.strip()]=value_str.strip().strip('\'')
+            key_str = key_str.strip()
+            value_str = value_str.strip()
+            if strip_quotes:
+                value_str = value_str.strip('"\'')
+            compile_dict[key_str.strip()]=value_str

Review comment:
       do we need to call `.strip` on `key_str` again ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -326,7 +330,8 @@ def parse_and_validate_fit_params(fit_param_str):
             accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 'update_freq',
                                    'write_graph', 'write_grad', 'write_images' ]
             tb_params_dict = validate_and_literal_eval_keys(tb_params_dict, accepted_tb_params, accepted_tb_params)
-            fit_params_dict['callbacks'] = [TensorBoard(tb_params_dict)]
+            tb_params_dict['log_dir'] = tb_params_dict['log_dir']+"{0}".format(current_seg_id)

Review comment:
       ```suggestion
     tb_params_dict['log_dir'] = "{0}{1}".format(tb_params_dict['log_dir'],(current_seg_id))
   ```

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+

Review comment:
       do we have a dev check test for `shuffle` ? If not should we add it ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',

Review comment:
       just curious, why did we have to move 'shuffle' from accepted_fit_params to literal_eval_fit_params ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)
+        assert(len(tree.body.elts) == 1)
+        assert(type(tree.body.elts[0]) == ast.Call)
+        assert(tree.body.elts[0].func.id == 'TensorBoard')
+        tb_params = tree.body.elts[0].keywords
+        tb_params_dict = { tb_params[i].arg : tb_params[i].value \
+                        for i in range(len(tb_params)) }
+    except:
+        plpy.error("Invalid callbacks fit param.  Currently, "
+                    "only TensorBoard callbacks are accepted.")
+
+    accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 'update_freq',

Review comment:
       In our dev check tests, we only test for the log_dir param. Should we add tests for some of the other params as well ? We can probably update the same test to accept these params

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):
 
     if fit_param_str:
-        fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
-
-        literal_eval_fit_params = ['batch_size','epochs','verbose',
+        fit_params_dict = convert_string_of_args_to_dict(fit_param_str, strip_quotes=False)
+        literal_eval_fit_params = ['batch_size','epochs','verbose', 'shuffle',
                                    'class_weight','initial_epoch','steps_per_epoch']
-        accepted_fit_params = literal_eval_fit_params + ['shuffle']
+        accepted_fit_params = literal_eval_fit_params + ['callbacks']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
                                                          literal_eval_fit_params,
                                                          accepted_fit_params)
-        if 'shuffle' in fit_params_dict:
-            shuffle_value = fit_params_dict['shuffle']
-            if shuffle_value == 'True' or shuffle_value == 'False':
-                fit_params_dict['shuffle'] = bool(shuffle_value)
+
+        if 'callbacks' in fit_params_dict:
+            fit_params_dict['callbacks'] = parse_callbacks(fit_params_dict['callbacks'], current_seg_id)
 
         return fit_params_dict
     else:
         return {}
 
+# Parse the callback fit params and create the TensorBoard object in the dictionary
+def parse_callbacks(callbacks, current_seg_id=0):
+    callbacks = callbacks.strip("'")
+    if not is_superuser(current_user()):
+        plpy.error("Only a superuser may use callbacks.")
+    try:
+        tree = ast.parse(callbacks, mode='eval')
+        assert(type(tree.body) == ast.List)
+        assert(len(tree.body.elts) == 1)
+        assert(type(tree.body.elts[0]) == ast.Call)
+        assert(tree.body.elts[0].func.id == 'TensorBoard')
+        tb_params = tree.body.elts[0].keywords
+        tb_params_dict = { tb_params[i].arg : tb_params[i].value \
+                        for i in range(len(tb_params)) }
+    except:
+        plpy.error("Invalid callbacks fit param.  Currently, "
+                    "only TensorBoard callbacks are accepted.")
+
+    accepted_tb_params = [ 'log_dir', 'histogram_freq', 'batch_size', 'update_freq',
+                           'write_graph', 'write_grad', 'write_images' ]
+    tb_params_dict = validate_and_literal_eval_keys(tb_params_dict, accepted_tb_params, accepted_tb_params)
+    tb_params_dict['log_dir'] = tb_params_dict['log_dir']+"{0}".format(current_seg_id)

Review comment:
       ```suggestion
       tb_params_dict['log_dir'] = "{0}{1}".format(tb_params_dict['log_dir'], current_seg_id)
   ```

##########
File path: src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in
##########
@@ -40,7 +40,7 @@ SELECT madlib_keras_fit(
     'model_arch',
     1,
     $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['mae']$$::text,
-    $$ batch_size=2, epochs=1, verbose=0 $$::text,
+    $$ batch_size=2, epochs=1, verbose=0, callbacks=[TensorBoard(log_dir='/tmp/tensorflow/single/')] $$::text,

Review comment:
       can we assert that the directory `/tmp/tensorflow/single/` is not empty or contains expected files ?

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
##########
@@ -287,27 +298,50 @@ def parse_optimizer(compile_dict):
 
 
 # Parse the fit parameters into a dictionary.
-def parse_and_validate_fit_params(fit_param_str):
+def parse_and_validate_fit_params(fit_param_str, current_seg_id=0):

Review comment:
       Do we need to have a default value for `current_seg_id` for this function and `parse_callbacks` ? Won't it be better if the calling function passes the segment id every time ? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org