You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by kh...@apache.org on 2020/09/11 19:23:04 UTC
[madlib] branch master updated: DL: [AutoML] Add support for 'diagonal' Hyperband optimized for MPP

This is an automated email from the ASF dual-hosted git repository.

khannaekta pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new b06bd0e  DL: [AutoML] Add support for 'diagonal' Hyperband optimized for MPP
b06bd0e is described below

commit b06bd0eff2063a36fec5b6b865e208b23ebf44f8
Author: Advitya Gemawat <ag...@vmware.com>
AuthorDate: Wed Aug 5 14:00:33 2020 -0700

    DL: [AutoML] Add support for 'diagonal' Hyperband optimized for MPP
    
    JIRA: MADLIB-1447, MADLIB-1448, MADLIB-1449
    
    We integrate AutoML capabilities in Apache MADlib by introducing a
    function called madlib_keras_automl, which bridges the worlds of setting
    and running model selection together, and helps automate and accelerate
    the model selection and training processes end-to-end. The user can
    declaratively specify the names of their train/val datasets, mst and
    output tables, model architecture and param grid details, the chosen
    method name and associated params, and various training details as
    desired, and our API handles the scheduling and execution components
    with the algorithm workload info displayed to the user.
    
    In the case of MPP databases such as Greenplum, we further accelerate
    this algorithm by simultaneously evaluating multiple rounds of the
    algorithm located along a 'diagonal', to keep machines busy and take
    advantage of the large distributed storage and compute power offered by
    Greenplum.
---
 .../deep_learning/madlib_keras_automl.py_in        | 428 ++++++++++++++++-
 .../deep_learning/madlib_keras_automl.sql_in       | 522 ++++++++++++++++++++-
 .../madlib_keras_model_selection.py_in             |  58 ++-
 .../madlib_keras_model_selection.sql_in            |  67 ++-
 .../deep_learning/madlib_keras_validator.py_in     |   2 +-
 .../deep_learning/madlib_keras_wrapper.py_in       |  24 +-
 .../deep_learning/test/madlib_keras_automl.sql_in  | 327 ++++++++++++-
 .../test/madlib_keras_model_selection.sql_in       |   4 +-
 .../test/unit_tests/test_madlib_keras_automl.py_in |   1 +
 .../test_madlib_keras_model_selection_table.py_in  |   4 +-
 10 files changed, 1365 insertions(+), 72 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
index b093e9e..696f031 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
@@ -17,17 +17,36 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from datetime import datetime
 import plpy
 import math
+from time import time
+
+from madlib_keras_validator import MstLoaderInputValidator
+from utilities.utilities import unique_string, add_postfix, extract_keyvalue_params, \
+    _assert, _assert_equal, rename_table
+from utilities.control import MinWarning, SetGUC
+from madlib_keras_fit_multiple_model import FitMultipleModel
+from madlib_keras_model_selection import MstSearch, ModelSelectionSchema
+from keras_model_arch_table import ModelArchSchema
+from utilities.validate_args import table_exists, drop_tables
 
-from utilities.utilities import _assert
-from utilities.control import MinWarning
 
 class AutoMLSchema:
     BRACKET = 's'
     ROUND = 'i'
     CONFIGURATIONS = 'n_i'
     RESOURCES = 'r_i'
+    HYPERBAND = 'hyperband'
+    R = 'R'
+    ETA = 'eta'
+    SKIP_LAST = 'skip_last'
+    LOSS_METRIC = 'training_loss_final'
+    TEMP_MST_TABLE = unique_string('temp_mst_table')
+    TEMP_MST_SUMMARY_TABLE = add_postfix(TEMP_MST_TABLE, '_summary')
+    TEMP_OUTPUT_TABLE = unique_string('temp_output_table')
+    METRICS_ITERS = 'metrics_iters' # custom column
+
 
 @MinWarning("warning")
 class HyperbandSchedule():
@@ -80,7 +99,7 @@ class HyperbandSchedule():
         in each round of each bracket and skips the number of last rounds specified in 'skip_last'
         """
         for s in reversed(range(self.s_max+1)):
-            n = int(math.ceil(int((self.s_max + 1)/(s+1))*math.pow(self.eta, s))) # initial number of configurations
+            n = int(math.ceil(int((self.s_max+1)/(s+1))*math.pow(self.eta, s))) # initial number of configurations
             r = self.R * math.pow(self.eta, -s)
 
             for i in range((s+1) - int(self.skip_last)):
@@ -138,3 +157,406 @@ class HyperbandSchedule():
                                       r_i_col=AutoMLSchema.RESOURCES,
                                       **locals())
             plpy.execute(insert_query)
+
+@MinWarning("warning")
+class KerasAutoML():
+    """The core AutoML function for running AutoML algorithms such as Hyperband.
+    This function executes the hyperband rounds 'diagonally' to evaluate multiple configurations together
+    and leverage the compute power of MPP databases such as Greenplum.
+    """
+    def __init__(self, schema_madlib, source_table, model_output_table, model_arch_table, model_selection_table,
+                 model_id_list, compile_params_grid, fit_params_grid, automl_method='hyperband',
+                 automl_params='R=6, eta=3, skip_last=0', random_state=None, object_table=None,
+                 use_gpus=False, validation_table=None, metrics_compute_frequency=None,
+                 name=None, description=None, **kwargs):
+        self.schema_madlib = schema_madlib
+        self.source_table = source_table
+        self.model_output_table = model_output_table
+        if self.model_output_table:
+            self.model_info_table = add_postfix(self.model_output_table, '_info')
+            self.model_summary_table = add_postfix(self.model_output_table, '_summary')
+        self.model_arch_table = model_arch_table
+        self.model_selection_table = model_selection_table
+        self.model_selection_summary_table = add_postfix(
+            model_selection_table, "_summary")
+        self.model_id_list = sorted(list(set(model_id_list)))
+        self.compile_params_grid = compile_params_grid
+        self.fit_params_grid = fit_params_grid
+
+        MstLoaderInputValidator(
+            model_arch_table=self.model_arch_table,
+            model_selection_table=self.model_selection_table,
+            model_selection_summary_table=self.model_selection_summary_table,
+            model_id_list=self.model_id_list,
+            compile_params_list=compile_params_grid,
+            fit_params_list=fit_params_grid,
+            object_table=object_table,
+            module_name='madlib_keras_automl'
+        )
+
+        self.automl_method = automl_method if automl_method else 'hyperband'
+        self.automl_params = automl_params if automl_params else 'R=6, eta=3, skip_last=0'
+        self.random_state = random_state
+        self.validate_and_define_inputs()
+
+        self.object_table = object_table
+        self.use_gpus = use_gpus if use_gpus else False
+        self.validation_table = validation_table
+        self.metrics_compute_frequency = metrics_compute_frequency
+        self.name = name
+        self.description = description
+
+        if self.validation_table:
+            AutoMLSchema.LOSS_METRIC = 'validation_loss_final'
+
+        self.create_model_output_table()
+        self.create_model_output_info_table()
+
+        if AutoMLSchema.HYPERBAND.startswith(self.automl_method.lower()):
+            self.find_hyperband_config()
+
+    def create_model_output_table(self):
+        output_table_create_query = """
+                                    CREATE TABLE {self.model_output_table}
+                                    ({ModelSelectionSchema.MST_KEY} INTEGER PRIMARY KEY,
+                                     {ModelArchSchema.MODEL_WEIGHTS} BYTEA,
+                                     {ModelArchSchema.MODEL_ARCH} JSON)
+                                    """.format(self=self, ModelSelectionSchema=ModelSelectionSchema,
+                                               ModelArchSchema=ModelArchSchema)
+        with MinWarning('warning'):
+            plpy.execute(output_table_create_query)
+
+    def create_model_output_info_table(self):
+        info_table_create_query = """
+                                  CREATE TABLE {self.model_info_table}
+                                  ({ModelSelectionSchema.MST_KEY} INTEGER PRIMARY KEY,
+                                   {ModelArchSchema.MODEL_ID} INTEGER,
+                                   {ModelSelectionSchema.COMPILE_PARAMS} TEXT,
+                                   {ModelSelectionSchema.FIT_PARAMS} TEXT,
+                                   model_type TEXT,
+                                   model_size DOUBLE PRECISION,
+                                   metrics_elapsed_time DOUBLE PRECISION[],
+                                   metrics_type TEXT[],
+                                   loss_type TEXT,
+                                   training_metrics_final DOUBLE PRECISION,
+                                   training_loss_final DOUBLE PRECISION,
+                                   training_metrics DOUBLE PRECISION[],
+                                   training_loss DOUBLE PRECISION[],
+                                   validation_metrics_final DOUBLE PRECISION,
+                                   validation_loss_final DOUBLE PRECISION,
+                                   validation_metrics DOUBLE PRECISION[],
+                                   validation_loss DOUBLE PRECISION[],
+                                   {AutoMLSchema.METRICS_ITERS} INTEGER[])
+                                       """.format(self=self, ModelSelectionSchema=ModelSelectionSchema,
+                                                  ModelArchSchema=ModelArchSchema, AutoMLSchema=AutoMLSchema)
+        with MinWarning('warning'):
+            plpy.execute(info_table_create_query)
+
+    def validate_and_define_inputs(self):
+
+        if AutoMLSchema.HYPERBAND.startswith(self.automl_method.lower()):
+            automl_params_dict = extract_keyvalue_params(self.automl_params,
+                                                         default_values={'R': 6, 'eta': 3, 'skip_last': 0},
+                                                         lower_case_names=False)
+            # casting dict values to int
+            for i in automl_params_dict:
+                automl_params_dict[i] = int(automl_params_dict[i])
+            _assert(len(automl_params_dict) >= 1 or len(automl_params_dict) <= 3,
+                    "DL: Only R, eta, and skip_last may be specified")
+            for i in automl_params_dict:
+                if i == AutoMLSchema.R:
+                    self.R = automl_params_dict[AutoMLSchema.R]
+                elif i == AutoMLSchema.ETA:
+                    self.eta = automl_params_dict[AutoMLSchema.ETA]
+                elif i == AutoMLSchema.SKIP_LAST:
+                    self.skip_last = automl_params_dict[AutoMLSchema.SKIP_LAST]
+                else:
+                    plpy.error("DL: {0} is an invalid param".format(i))
+            _assert(self.eta > 1, "DL: eta must be greater than 1")
+            _assert(self.R >= self.eta, "DL: R should not be less than eta")
+            self.s_max = int(math.floor(math.log(self.R, self.eta)))
+            _assert(self.skip_last >= 0 and self.skip_last < self.s_max+1, "DL: skip_last must be " +
+                    "non-negative and less than {0}".format(self.s_max))
+        else:
+            plpy.error("DL: Only hyperband is currently supported as the automl method")
+
+    def _is_valid_metrics_compute_frequency(self, num_iterations):
+        """
+        Utility function (same as that in the Fit Multiple function) to check validity of mcf value for computing
+        metrics during an AutoML algorithm run.
+        :param num_iterations: interations/resources to allocate for training.
+        :return: boolean on validity of the mcf value.
+        """
+        return self.metrics_compute_frequency is None or \
+               (self.metrics_compute_frequency >= 1 and \
+                self.metrics_compute_frequency <= num_iterations)
+
+    def find_hyperband_config(self):
+        """
+        Runs the diagonal hyperband algorithm.
+        """
+        initial_vals = {}
+
+        # get hyper parameter configs for each s
+        for s in reversed(range(self.s_max+1)):
+            n = int(math.ceil(int((self.s_max+1)/(s+1))*math.pow(self.eta, s))) # initial number of configurations
+            r = self.R * math.pow(self.eta, -s) # initial number of iterations to run configurations for
+            initial_vals[s] = (n, int(round(r)))
+        self.start_training_time = self.get_current_timestamp()
+        random_search = MstSearch(self.model_arch_table, self.model_selection_table, self.model_id_list,
+                                  self.compile_params_grid, self.fit_params_grid, 'random',
+                                  sum([initial_vals[k][0] for k in initial_vals][self.skip_last:]), self.random_state,
+                                  self.object_table)
+        random_search.load() # for populating mst tables
+
+        # for creating the summary table for usage in fit multiple
+        plpy.execute("CREATE TABLE {AutoMLSchema.TEMP_MST_SUMMARY_TABLE} AS " \
+                     "SELECT * FROM {random_search.model_selection_summary_table}".format(AutoMLSchema=AutoMLSchema,
+                                                                                          random_search=random_search))
+        ranges_dict = self.mst_key_ranges_dict(initial_vals)
+        # to store the bracket and round numbers
+        s_dict, i_dict = {}, {}
+        for key, val in ranges_dict.items():
+            for mst_key in range(val[0], val[1]+1):
+                s_dict[mst_key] = key
+                i_dict[mst_key] = -1
+
+        # outer loop on diagonal
+        for i in range((self.s_max+1) - int(self.skip_last)):
+            # inner loop on s desc
+            temp_lst = []
+            configs_prune_lookup = {}
+            for s in range(self.s_max, self.s_max-i-1, -1):
+                n = initial_vals[s][0]
+                n_i = n * math.pow(self.eta, -i+self.s_max-s)
+                configs_prune_lookup[s] = int(round(n_i))
+                temp_lst.append("{0} configs under bracket={1} & round={2}".format(int(n_i), s, s-self.s_max+i))
+            num_iterations = int(initial_vals[self.s_max-i][1])
+            plpy.info('*** Diagonally evaluating ' + ', '.join(temp_lst) + ' with {0} iterations ***'.format(
+                num_iterations))
+
+            self.reconstruct_temp_mst_table(i, ranges_dict, configs_prune_lookup) # has keys to evaluate
+            active_keys = plpy.execute("SELECT mst_key FROM {AutoMLSchema.TEMP_MST_TABLE}".format(AutoMLSchema=
+                                                                                                  AutoMLSchema))
+            for k in active_keys:
+                i_dict[k['mst_key']] += 1
+            self.warm_start = int(i != 0)
+            mcf = self.metrics_compute_frequency if self._is_valid_metrics_compute_frequency(num_iterations) else None
+            model_training = FitMultipleModel(self.schema_madlib, self.source_table, AutoMLSchema.TEMP_OUTPUT_TABLE,
+                                              AutoMLSchema.TEMP_MST_TABLE, num_iterations, self.use_gpus,
+                                              self.validation_table, mcf, self.warm_start, self.name, self.description)
+            self.update_model_output_table(model_training)
+            self.update_model_output_info_table(i, model_training, initial_vals)
+        self.end_training_time = self.get_current_timestamp()
+        self.add_additional_info_cols(s_dict, i_dict)
+        self.update_model_selection_table()
+        self.generate_model_output_summary_table(model_training)
+        self.remove_temp_tables(model_training)
+
+    def get_current_timestamp(self):
+        """for start and end times for the chosen AutoML algorithm. Showcased in the output summary table"""
+        return datetime.fromtimestamp(time()).strftime('%Y-%m-%d %H:%M:%S')
+
+    def mst_key_ranges_dict(self, initial_vals):
+        """
+        Extracts the ranges of model configs (using mst_keys) belonging to / sampled as part of
+        executing a particular SHA bracket.
+        """
+        d = {}
+        for s_val in sorted(initial_vals.keys(), reverse=True): # going from s_max to 0
+            if s_val == self.s_max:
+                d[s_val] = (1, initial_vals[s_val][0])
+            else:
+                d[s_val] = (d[s_val+1][1]+1, d[s_val+1][1]+initial_vals[s_val][0])
+        return d
+
+    def reconstruct_temp_mst_table(self, i, ranges_dict, configs_prune_lookup):
+        """
+        Drops and Reconstructs a temp mst table for evaluation along particular diagonals of hyperband.
+        :param i: outer diagonal loop iteration.
+        :param ranges_dict: model config ranges to group by bracket number.
+        :param configs_prune_lookup: Lookup dictionary for configs to evaluate for a diagonal.
+        :return:
+        """
+        if i == 0:
+            _assert_equal(len(configs_prune_lookup), 1, "invalid args")
+            lower_bound, upper_bound = ranges_dict[self.s_max]
+            plpy.execute("CREATE TABLE {AutoMLSchema.TEMP_MST_TABLE} AS SELECT * FROM {self.model_selection_table} "
+                         "WHERE mst_key >= {lower_bound} AND mst_key <= {upper_bound}".format(self=self,
+                                                                                              AutoMLSchema=AutoMLSchema,
+                                                                                              lower_bound=lower_bound,
+                                                                                              upper_bound=upper_bound,))
+            return
+        # dropping and repopulating temp_mst_table
+        drop_tables([AutoMLSchema.TEMP_MST_TABLE])
+
+        # {mst_key} changed from SERIAL to INTEGER for safe insertions and preservation of mst_key values
+        create_query = """
+                        CREATE TABLE {AutoMLSchema.TEMP_MST_TABLE} (
+                            {mst_key} INTEGER,
+                            {model_id} INTEGER,
+                            {compile_params} VARCHAR,
+                            {fit_params} VARCHAR,
+                            unique ({model_id}, {compile_params}, {fit_params})
+                        );
+                       """.format(AutoMLSchema=AutoMLSchema,
+                                  mst_key=ModelSelectionSchema.MST_KEY,
+                                  model_id=ModelSelectionSchema.MODEL_ID,
+                                  compile_params=ModelSelectionSchema.COMPILE_PARAMS,
+                                  fit_params=ModelSelectionSchema.FIT_PARAMS)
+        with MinWarning('warning'):
+            plpy.execute(create_query)
+
+        query = ""
+        new_configs = True
+        for s_val in configs_prune_lookup:
+            lower_bound, upper_bound = ranges_dict[s_val]
+            if new_configs:
+                query += "INSERT INTO {AutoMLSchema.TEMP_MST_TABLE} SELECT mst_key, model_id, compile_params, fit_params " \
+                         "FROM {self.model_selection_table} WHERE mst_key >= {lower_bound} " \
+                         "AND mst_key <= {upper_bound};".format(self=self, AutoMLSchema=AutoMLSchema,
+                                                                lower_bound=lower_bound, upper_bound=upper_bound)
+                new_configs = False
+            else:
+                query += "INSERT INTO {AutoMLSchema.TEMP_MST_TABLE} SELECT mst_key, model_id, compile_params, fit_params " \
+                         "FROM {self.model_info_table} WHERE mst_key >= {lower_bound} " \
+                         "AND mst_key <= {upper_bound} ORDER BY {AutoMLSchema.LOSS_METRIC} " \
+                         "LIMIT {configs_prune_lookup_val};".format(self=self, AutoMLSchema=AutoMLSchema,
+                                                                    lower_bound=lower_bound, upper_bound=upper_bound,
+                                                                    configs_prune_lookup_val=configs_prune_lookup[s_val])
+        plpy.execute(query)
+
+    def update_model_output_table(self, model_training):
+        """
+        Updates gathered information of a hyperband diagonal run to the overall model output table.
+        :param model_training: Fit Multiple function call object.
+        """
+        # updates model weights for any previously trained configs
+        plpy.execute("UPDATE {self.model_output_table} a SET model_weights=" \
+                     "t.model_weights FROM {model_training.original_model_output_table} t " \
+                     "WHERE a.mst_key=t.mst_key".format(self=self, model_training=model_training))
+
+        # truncate and re-creates table to avoid memory blow-ups
+        with SetGUC("dev_opt_unsafe_truncate_in_subtransaction", "on"):
+            temp_model_table = unique_string('updated_model')
+            plpy.execute("CREATE TABLE {temp_model_table} AS SELECT * FROM {self.model_output_table};" \
+                         "TRUNCATE {self.model_output_table}; " \
+                         "DROP TABLE {self.model_output_table};".format(temp_model_table=temp_model_table, self=self))
+            rename_table(self.schema_madlib, temp_model_table, self.model_output_table)
+
+        # inserts any newly trained configs
+        plpy.execute("INSERT INTO {self.model_output_table} SELECT * FROM {model_training.original_model_output_table} " \
+                     "WHERE {model_training.original_model_output_table}.mst_key NOT IN " \
+                     "(SELECT mst_key FROM {self.model_output_table})".format(self=self,
+                                                                              model_training=model_training))
+
+    def update_model_output_info_table(self, i, model_training, initial_vals):
+        """
+        Updates gathered information of a hyperband diagonal run to the overall model output info table.
+        :param i: outer diagonal loop iteration.
+        :param model_training: Fit Multiple function call object.
+        :param initial_vals: Dictionary of initial configurations and resources as part of the initial hyperband
+        schedule.
+        """
+        # normalizing factor for metrics_iters due to warm start
+        epochs_factor = sum([n[1] for n in initial_vals.values()][::-1][:i]) # i & initial_vals args needed
+        iters = plpy.execute("SELECT {AutoMLSchema.METRICS_ITERS} " \
+                             "FROM {model_training.model_summary_table}".format(AutoMLSchema=AutoMLSchema,
+                                                                                model_training=model_training))
+        metrics_iters_val = [epochs_factor+mi for mi in iters[0]['metrics_iters']] # global iteration counter
+
+        validation_update_q = "validation_metrics_final=t.validation_metrics_final, " \
+                                     "validation_loss_final=t.validation_loss_final, " \
+                                     "validation_metrics=a.validation_metrics || t.validation_metrics, " \
+                                     "validation_loss=a.validation_loss || t.validation_loss, " \
+            if self.validation_table else ""
+
+        # updates train/val info for any previously trained configs
+        plpy.execute("UPDATE {self.model_info_table} a SET " \
+                     "metrics_elapsed_time=a.metrics_elapsed_time || t.metrics_elapsed_time, " \
+                     "training_metrics_final=t.training_metrics_final, " \
+                     "training_loss_final=t.training_loss_final, " \
+                     "training_metrics=a.training_metrics || t.training_metrics, " \
+                     "training_loss=a.training_loss || t.training_loss, ".format(self=self) + validation_update_q +
+                     "{AutoMLSchema.METRICS_ITERS}=a.metrics_iters || ARRAY{metrics_iters_val}::INTEGER[] " \
+                     "FROM {model_training.model_info_table} t " \
+                     "WHERE a.mst_key=t.mst_key".format(model_training=model_training, AutoMLSchema=AutoMLSchema,
+                                                        metrics_iters_val=metrics_iters_val))
+
+        # inserts info about metrics and validation for newly trained model configs
+        plpy.execute("INSERT INTO {self.model_info_table} SELECT t.*, ARRAY{metrics_iters_val}::INTEGER[] AS metrics_iters " \
+                     "FROM {model_training.model_info_table} t WHERE t.mst_key NOT IN " \
+                     "(SELECT mst_key FROM {self.model_info_table})".format(self=self,
+                                                                            model_training=model_training,
+                                                                            metrics_iters_val=metrics_iters_val))
+
+    def add_additional_info_cols(self, s_dict, i_dict):
+        """Adds s and i columns to the info table"""
+
+        plpy.execute("ALTER TABLE {self.model_info_table} ADD COLUMN s int, ADD COLUMN i int;".format(self=self))
+
+        l = [(k, s_dict[k], i_dict[k]) for k in s_dict]
+        query = "UPDATE {self.model_info_table} t SET s=b.s_val, i=b.i_val FROM unnest(ARRAY{l}) " \
+                "b (key integer, s_val integer, i_val integer) WHERE t.mst_key=b.key".format(self=self, l=l)
+        plpy.execute(query)
+
+    def update_model_selection_table(self):
+        """
+        Drops and re-create the mst table to only include the best performing model configuration.
+        """
+        drop_tables([self.model_selection_table])
+
+        # only retaining best performing config
+        plpy.execute("CREATE TABLE {self.model_selection_table} AS SELECT mst_key, model_id, compile_params, " \
+                     "fit_params FROM {self.model_info_table} " \
+                     "ORDER BY {AutoMLSchema.LOSS_METRIC} LIMIT 1".format(self=self, AutoMLSchema=AutoMLSchema))
+
+    def generate_model_output_summary_table(self, model_training):
+        """
+        Creates and populates static values related to the AutoML workload.
+        :param model_training: Fit Multiple function call object.
+        """
+        create_query = plpy.prepare("""
+                CREATE TABLE {self.model_summary_table} AS
+                SELECT
+                    $MAD${self.source_table}$MAD$::TEXT AS source_table,
+                    $MAD${self.validation_table}$MAD$::TEXT AS validation_table,
+                    $MAD${self.model_output_table}$MAD$::TEXT AS model,
+                    $MAD${self.model_info_table}$MAD$::TEXT AS model_info,
+                    (SELECT dependent_varname FROM {model_training.model_summary_table})
+                    AS dependent_varname,
+                    (SELECT independent_varname FROM {model_training.model_summary_table})
+                    AS independent_varname,
+                    $MAD${self.model_arch_table}$MAD$::TEXT AS model_arch_table,
+                    $MAD${self.model_selection_table}$MAD$::TEXT AS model_selection_table,
+                    $MAD${self.automl_method}$MAD$::TEXT AS automl_method,
+                    $MAD${self.automl_params}$MAD$::TEXT AS automl_params,
+                    $MAD${self.random_state}$MAD$::TEXT AS random_state,
+                    $MAD${self.object_table}$MAD$::TEXT AS object_table,
+                    {self.use_gpus} AS use_gpus,
+                    (SELECT metrics_compute_frequency FROM {model_training.model_summary_table})::INTEGER 
+                    AS metrics_compute_frequency,
+                    $MAD${self.name}$MAD$::TEXT AS name,
+                    $MAD${self.description}$MAD$::TEXT AS description,
+                    '{self.start_training_time}'::TIMESTAMP AS start_training_time,
+                    '{self.end_training_time}'::TIMESTAMP AS end_training_time,
+                    (SELECT madlib_version FROM {model_training.model_summary_table}) AS madlib_version,
+                    (SELECT num_classes FROM {model_training.model_summary_table})::INTEGER AS num_classes,
+                    (SELECT class_values FROM {model_training.model_summary_table}) AS class_values,
+                    (SELECT dependent_vartype FROM {model_training.model_summary_table}) 
+                    AS dependent_vartype,
+                    (SELECT normalizing_const FROM {model_training.model_summary_table}) 
+                    AS normalizing_const
+            """.format(self=self, model_training=model_training))
+
+        with MinWarning('warning'):
+            plpy.execute(create_query)
+
+    def remove_temp_tables(self, model_training):
+        """
+        Remove all intermediate tables created for AutoML runs/updates.
+        :param model_training: Fit Multiple function call object.
+        """
+        drop_tables([model_training.original_model_output_table, model_training.model_info_table,
+                     model_training.model_summary_table, AutoMLSchema.TEMP_MST_TABLE,
+                     AutoMLSchema.TEMP_MST_SUMMARY_TABLE])
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.sql_in
index e27cfda..06889d2 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.sql_in
@@ -27,28 +27,511 @@
  *//* ----------------------------------------------------------------------- */
 
 m4_include(`SQLCommon.m4')
+
+
 /**
 @addtogroup grp_automl
 
-@brief Utility function to set up a model selection table for model architecture search
-and hyperparameter tuning.
+
+@brief Functions to run automated machine learning (AutoML) algorithms to automate
+and speed-up the model selection and training processes for model architecture search,
+hyperparameter tuning, and model evaluation.
 
 \warning <em> This MADlib method is still in early stage development.
 Interface and implementation are subject to change. </em>
 
 <div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#madlib_keras_automl">AutoML Function</a></li>
 <li class="level1"><a href="#hyperband_schedule">Hyperband Schedule</a></li>
 <li class="level1"><a href="#example">Examples</a></li>
 <li class="level1"><a href="#notes">Notes</a></li>
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul></div>
 
-This module sets up the Hyperband schedule of evaluating configurations
-for use by the Keras AutoML of MADlib.
+This module sets up the AutoML algorithms to automate and accelerate
+the model selection and training processes, involving hyperparameter optimization,
+model architecture search, and model training.
+
+The module also has a a utility function for viewing the Hyperband schedule of
+evaluating configurations for use by the Keras AutoML of MADlib.
 By configuration we mean both hyperparameter tuning and
-model architecture search.  The table defines the unique combinations
-of model architectures, compile and fit parameters
-to run on a massively parallel processing database cluster.
+model architecture search.
+
+@anchor madlib_keras_automl
+@par AutoML
+
+<pre class="syntax">
+madlib_keras_automl(
+    source_table,
+    model_output_table,
+    model_arch_table,
+    model_selection_table,
+    model_id_list,
+    compile_params_grid,
+    fit_params_grid,
+    automl_method,
+    automl_params,
+    random_state,
+    object_table,
+    use_gpus,
+    validation_table,
+    metrics_compute_frequency,
+    name,
+    description
+    )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+  <dt>source_table</dt>
+  <dd>TEXT. Name of the table containing the training data.
+  This is the name of the output table from the image preprocessor. Independent
+  and dependent variables are specified in the preprocessor
+  step which is why you do not need to explictly state
+  them here as part of the fit function. The configurations would be evaluated on the basis the training loss,
+  unless a validation table is specified below.
+  </dd>
+
+  <dt>model_output_table</dt>
+  <dd>TEXT. Name of the output table containing the
+  multiple models created.
+  @note pg_temp is not allowed as an output table schema for fit multiple.
+  Details of output tables are shown below.
+  </dd>
+
+  <dt>model_arch_table</dt>
+  <dd>VARCHAR. Table containing model architectures and weights.
+  For more information on this table
+  refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+  </dd>
+
+  <dt>model_selection_table</dt>
+  <dd>VARCHAR. Model selection table created by this utility.  A summary table
+  named <model_selection_table>_summary is also created.  Contents of both output
+  tables are described below.
+  </dd>
+
+  <dt>model_id_list</dt>
+  <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
+  in the run combinations.  For hyperparameter search, this will typically be
+  one model ID.  For model architecture search, this will be the different model IDs
+  that you want to test.
+  </dd>
+
+  <dt>compile_params_grid</dt>
+  <dd>VARCHAR. String representation of a Python dictionary
+  of compile parameters to be tested. Each entry
+  of the dictionary should consist of keys as compile parameter names,
+  and values as a Python list of compile parameter values to be passed to Keras.
+  Also, optimizer parameters are a nested dictionary to allow different
+  optimizer types to have different parameters or ranges of parameters.
+  Here is an example:
+
+  <pre class="example">
+  $$
+    {'loss': ['categorical_crossentropy'],
+     'optimizer_params_list': [
+        {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+        {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
+     'metrics': ['accuracy']
+    }
+  $$
+  </pre>
+
+  The following types of sampling are supported:  'linear', 'log' and 'log_near_one'.
+  The 'log_near_one' sampling is useful for exponentially weighted average types of parameters like momentum,
+  which are very sensitive to changes near 1.  It has the effect of producing more values near 1
+  than regular log-based sampling.
+
+  In the case of grid search, omit the sample type and just put the grid points in the list.
+  For custom loss functions or custom metrics,
+  list the custom function name in the usual way, and provide the name of the
+  table where the serialized Python objects reside using the
+  parameter 'object_table' below. See the examples section later on this page for more examples.
+  </dd>
+
+  <dt>fit_params_grid</dt>
+  <dd>VARCHAR.  String representation of a Python dictionary
+  of fit parameters to be tested. Each entry
+  of the dictionary should consist of keys as fit parameter names,
+  and values as a Python list of fit parameter values
+  to be passed to Keras. Here is an example:
+
+  <pre class="example">
+  $$
+    {'batch_size': [32, 64, 128, 256],
+     'epochs': [10, 20, 30]
+    }
+  $$
+  </pre>
+  See the examples section later on this page for more examples.
+  </dd>
+
+  <dt>automl_method (optional)</dt>
+  <dd>VARCHAR, default 'hyperband'. Name of the automl algorithm to run.
+  Currently only support hyperband. Note that you can also use short prefixes
+  for the 'hyperband' keyword, e.g.,'hyper' or 'hyp' instead
+  of writing out 'hyperband' in full.
+  </dd>
+
+  <dt>automl_params (optional)</dt>
+  <dd>VARCHAR, default 'R=6, eta=3, skip_last=0'. Parameters for the chosen automl
+  method in a comma-separated string of key-value pairs. Hyperband params are:
+  R - the maximum amount of resources/iterations allocated to a single configuration
+  in a round of hyperband, eta - factor controlling the proportion of configurations discarded in each
+  round of successive halving, skip_last - number of last diagonal brackets to skip running
+  in the algorithm.
+  We encourage setting an low R value (i.e. 2 to 10), or a high R value and a high skip_last value to evaluate
+  a variety of configurations with decent number of iterations. See the description below for details.
+  </dd>
+
+  <dt>random_state (optional)</dt>
+  <dd>INTEGER, default: NULL.  Pseudo random number generator
+  state used for random uniform sampling from lists of possible
+  values. Pass an integer to evaluate a fixed set of configurations.
+
+  @note
+    Specifying a random state doesn't not guarantee result reproducibility of the best configuration or the best
+    train/validation accuracy/loss. It guarantees that the same set of configurations will be chosen for evaluation.
+
+  </dd>
+
+  <dt>object_table (optional)</dt>
+  <dd>VARCHAR, default: NULL. Name of the table containing
+  Python objects in the case that custom loss functions or
+  custom metrics are specified in the 'compile_params_grid'.
+  </dd>
+
+  <dt>validation_table (optional)</dt>
+  <dd>TEXT, default: none. Name of the table containing
+  the validation dataset.
+  Note that the validation dataset must be preprocessed
+  in the same way as the training dataset, so this
+  is the name of the output
+  table from running the image preprocessor on the validation dataset.
+  Using a validation dataset can mean a
+  longer training time depending on its size, and the configurations would be evaluated on the basis of validation
+  loss instead of training loss.
+  This can be controlled using the 'metrics_compute_frequency'
+  parameter described below.</dd>
+
+  <DT>metrics_compute_frequency (optional)</DT>
+  <DD>INTEGER, default: once at the end of training
+  after 'num_iterations'.  Frequency to compute per-iteration
+  metrics for the training dataset and validation dataset
+  (if specified).  There can be considerable cost to
+  computing metrics every iteration, especially if the
+  training dataset is large.  This parameter is a way of
+  controlling the frequency of those computations.
+  For example, if you specify 5, then metrics will be computed
+  every 5 iterations as well as at the end of training
+  after 'num_iterations'.  If you use the default,
+  metrics will be computed only
+  once after 'num_iterations' have completed.
+  </DD>
+
+  <DT>name (optional)</DT>
+  <DD>TEXT, default: NULL.
+    Free text string to identify a name, if desired.
+  </DD>
+
+  <DT>description (optional)</DT>
+  <DD>TEXT, default: NULL.
+    Free text string to provide a description, if desired.
+  </DD>
+
+</dl>
+
+<b>Output tables</b>
+<br>
+
+    The model selection output table has exactly 1 row of the best model configuration based on the
+    training/validation loss and contains the following columns:
+    <table class="output">
+      <tr>
+        <th>mst_key</th>
+        <td>INTEGER. ID that defines a unique tuple for
+        model architecture-compile parameters-fit parameters.
+        </td>
+      </tr>
+      <tr>
+        <th>model_id</th>
+        <td>VARCHAR. Model architecture ID from the 'model_arch_table'.
+        </td>
+      </tr>
+      <tr>
+        <th>compile_params</th>
+        <td>VARCHAR. Keras compile parameters.
+        </td>
+      </tr>
+      <tr>
+        <th>fit_params</th>
+        <td>VARCHAR. Keras fit parameters.
+        </td>
+      </tr>
+    </table>
+    A summary table named <model_selection_table>_summary is
+    also created, which contains the following column:
+    <table class="output">
+      <tr>
+        <th>model_arch_table</th>
+        <td>VARCHAR. Name of the model architecture table containing the
+        model architecture IDs.
+        </td>
+      </tr>
+      <tr>
+        <th>object_table</th>
+        <td>VARCHAR. Name of the object table containing the serialized
+        Python objects for custom loss functions and custom metrics.
+        If there are none, this field will be blank.
+        </td>
+      </tr>
+    </table>
+
+    The model output table produced by fit contains the following columns.
+    There is one row per model configuration generated:
+    <table class="output">
+      <tr>
+        <th>mst_key</th>
+        <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters,
+        as defined in the 'model_selection_table'.</td>
+      </tr>
+      <tr>
+        <th>model_weights</th>
+        <td>BYTEA8. Byte array containing the weights of the neural net.</td>
+      </tr>
+      <tr>
+        <th>model_arch</th>
+        <td>TEXT. A JSON representation of the model architecture
+        used in training.</td>
+      </tr>
+    </table>
+
+    An info table named \<model_output_table\>_info is also created, which has the following columns.
+    There is one row per model as per the rows in the 'model_selection_table':
+    <table class="output">
+      <tr>
+        <th>mst_key</th>
+        <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters,
+        as defined in the 'model_selection_table'.</td>
+      </tr>
+      <tr>
+        <th>model_id</th>
+        <td>INTEGER. ID that defines model in the 'model_arch_table'.</td>
+      </tr>
+      <tr>
+        <th>compile_params</th>
+        <td>Compile parameters passed to Keras.</td>
+    </tr>
+    <tr>
+        <th>fit_params</th>
+        <td>Fit parameters passed to Keras.</td>
+    </tr>
+    <tr>
+        <th>model_type</th>
+        <td>General identifier for type of model trained.
+        Currently says 'madlib_keras'.</td>
+    </tr>
+    <tr>
+        <th>model_size</th>
+        <td>Size of the model in KB.  Models are stored in
+        'bytea' data format which is used for binary strings
+        in PostgreSQL type databases.</td>
+    </tr>
+    <tr>
+        <th>metrics_elapsed_time</th>
+        <td> Array of elapsed time for metric computations as
+        per the 'metrics_compute_frequency' parameter.
+        Useful for drawing a curve showing loss, accuracy or
+        other metrics as a function of time.
+        For example, if 'metrics_compute_frequency=5'
+        this would be an array of elapsed time for every 5th
+        iteration, plus the last iteration.</td>
+    </tr>
+    <tr>
+        <th>metrics_type</th>
+        <td>Metric specified in the 'compile_params'.</td>
+    </tr>
+    <tr>
+        <th>training_metrics_final</th>
+        <td>Final value of the training
+        metric after all iterations have completed.
+        The metric reported is the one
+        specified in the 'metrics_type' parameter.</td>
+    </tr>
+    <tr>
+        <th>training_loss_final</th>
+        <td>Final value of the training loss after all
+        iterations have completed.</td>
+    </tr>
+    <tr>
+        <th>training_metrics</th>
+        <td>Array of training metrics as
+        per the 'metrics_compute_frequency' parameter.
+        For example, if 'metrics_compute_frequency=5'
+        this would be an array of metrics for every 5th
+        iteration, plus the last iteration.</td>
+    </tr>
+    <tr>
+        <th>training_loss</th>
+        <td>Array of training losses as
+        per the 'metrics_compute_frequency' parameter.
+        For example, if 'metrics_compute_frequency=5'
+        this would be an array of losses for every 5th
+        iteration, plus the last iteration.</td>
+    </tr>
+    <tr>
+        <th>validation_metrics_final</th>
+        <td>Final value of the validation
+        metric after all iterations have completed.
+        The metric reported is the one
+        specified in the 'metrics_type' parameter.</td>
+    </tr>
+    <tr>
+        <th>validation_loss_final</th>
+        <td>Final value of the validation loss after all
+        iterations have completed.</td>
+    </tr>
+    <tr>
+        <th>validation_metrics</th>
+        <td>Array of validation metrics as
+        per the 'metrics_compute_frequency' parameter.
+        For example, if 'metrics_compute_frequency=5'
+        this would be an array of metrics for every 5th
+        iteration, plus the last iteration.</td>
+    </tr>
+    <tr>
+        <th>validation_loss</th>
+        <td>Array of validation losses as
+        per the 'metrics_compute_frequency' parameter.
+        For example, if 'metrics_compute_frequency=5'
+        this would be an array of losses for every 5th
+        iteration, plus the last iteration.</td>
+    </tr>
+    <tr>
+        <th>metrics_iters</th>
+        <td>Array indicating the iterations for which
+        metrics are calculated, as derived from the
+        parameters 'metrics_compute_frequency' and iterations decided by the automl algorithm.
+        For example, if 'num_iterations=5'
+        and 'metrics_compute_frequency=2', then 'metrics_iters' value
+        would be {2,4,5} indicating that metrics were computed
+        at iterations 2, 4 and 5 (at the end).
+        If 'num_iterations=5'
+        and 'metrics_compute_frequency=1', then 'metrics_iters' value
+        would be {1,2,3,4,5} indicating that metrics were computed
+        at every iteration.</td>
+    </tr>
+    <tr>
+        <th>s</th>
+        <td>Bracket number</td>
+    </tr>
+    <tr>
+        <th>i</th>
+        <td>Latest evaluated round number</td>
+    </tr>
+
+    </table>
+
+    A summary table named \<model_output_table\>_summary is also created, which has the following columns:
+    <table class="output">
+    <tr>
+        <th>source_table</th>
+        <td>Source table used for training.</td>
+    </tr>
+    <tr>
+        <th>validation_table</th>
+        <td>Name of the table containing
+        the validation dataset (if specified).</td>
+    </tr>
+    <tr>
+        <th>model</th>
+        <td>Name of the output table containing
+        the model for each model selection tuple.</td>
+    </tr>
+    <tr>
+        <th>model_info</th>
+        <td>Name of the output table containing
+        the model performance and other info for
+        each model selection tuple.</td>
+    </tr>
+    <tr>
+        <th>dependent_varname</th>
+        <td>Dependent variable column from the original
+        source table in the image preprocessing step.</td>
+    </tr>
+    <tr>
+        <th>independent_varname</th>
+        <td>Independent variables column from the original
+        source table in the image preprocessing step.</td>
+    </tr>
+    <tr>
+        <th>model_arch_table</th>
+        <td>Name of the table containing
+        the model architecture and (optionally) the
+        initial model weights.</td>
+    </tr>
+    <tr>
+        <th>model selection table</th>
+        <td>Name of the mst table containing
+        the best configuration.</td>
+    </tr>
+    <tr>
+        <th>automl_method</th>
+        <td>Name of the automl method</td>
+    </tr>
+    <tr>
+        <th>automl_params</th>
+        <td>AutoML param values</td>
+    </tr>
+    <tr>
+        <th>random_state</th>
+        <td>Chosen random seed</td>
+    </tr>
+    <tr>
+        <th>metrics_compute_frequency</th>
+        <td>Frequency that per-iteration metrics are computed
+        for the training dataset and validation
+        datasets.</td>
+    </tr>
+    <tr>
+        <th>name</th>
+        <td>Name of the training run (free text).</td>
+    </tr>
+    <tr>
+        <th>description</th>
+        <td>Description of the training run (free text).</td>
+    </tr>
+    <tr>
+        <th>start_training_time</th>
+        <td>Timestamp for start of training.</td>
+    </tr>
+    <tr>
+        <th>end_training_time</th>
+        <td>Timestamp for end of training.</td>
+    </tr>
+    <tr>
+        <th>madlib_version</th>
+        <td>Version of MADlib used.</td>
+    </tr>
+    <tr>
+        <th>num_classes</th>
+        <td>Count of distinct classes values used.</td>
+    </tr>
+    <tr>
+        <th>class_values</th>
+        <td>Array of actual class values used.</td>
+    </tr>
+    <tr>
+        <th>dependent_vartype</th>
+        <td>Data type of the dependent variable.</td>
+    </tr>
+    <tr>
+        <th>normalizing_constant</th>
+        <td>Normalizing constant used from the
+        image preprocessing step.</td>
+    </tr>
+   </table>
 
 @anchor hyperband_schedule
 @par Hyperband Schedule
@@ -143,3 +626,28 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.hyperband_schedule(
         schedule_loader.load()
 $$ LANGUAGE plpythonu VOLATILE
               m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_automl(
+    source_table                   VARCHAR,
+    model_output_table             VARCHAR,
+    model_arch_table               VARCHAR,
+    model_selection_table          VARCHAR,
+    model_id_list                  INTEGER[],
+    compile_params_grid            VARCHAR,
+    fit_params_grid                VARCHAR,
+    automl_method                  VARCHAR DEFAULT 'hyperband',
+    automl_params                  VARCHAR DEFAULT 'R=6, eta=3, skip_last=0',
+    random_state                   INTEGER DEFAULT NULL,
+    object_table                   VARCHAR DEFAULT NULL,
+    use_gpus                       BOOLEAN DEFAULT FALSE,
+    validation_table               VARCHAR DEFAULT NULL,
+    metrics_compute_frequency      INTEGER DEFAULT NULL,
+    name                           VARCHAR DEFAULT NULL,
+    description                    VARCHAR DEFAULT NULL
+) RETURNS VOID AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_automl')
+    with AOControl(False):
+        schedule_loader = madlib_keras_automl.KerasAutoML(**globals())
+$$ LANGUAGE plpythonu VOLATILE
+    m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
index 67c8713..9d9fa60 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
@@ -117,8 +117,8 @@ class MstLoader():
         dict_dedup = {}
         for string in list_strs:
             d = convert_string_of_args_to_dict(string)
-            hash_tuple = tuple( '{0} = {1}'\
-            .format(x, d[x]) for x in sorted(d.keys()))
+            hash_tuple = tuple( '{0} = {1}' \
+                                .format(x, d[x]) for x in sorted(d.keys()))
             dict_dedup[hash_tuple] = string
 
         return dict_dedup.values()
@@ -281,8 +281,14 @@ class MstSearch():
         self.accepted_distributions = ['linear', 'log', 'log_near_one']
 
         # extracting python dict
-        self.compile_params_dict = literal_eval(compile_params_grid)
-        self.fit_params_dict = literal_eval(fit_params_grid)
+        try:
+            self.compile_params_dict = literal_eval(compile_params_grid)
+        except:
+            plpy.error("Invalid syntax in 'compile_params_dict'")
+        try:
+            self.fit_params_dict = literal_eval(fit_params_grid)
+        except:
+            plpy.error("Invalid syntax in 'fit_params_dict'")
         self.validate_inputs(compile_params_grid, fit_params_grid)
 
         self.msts = []
@@ -293,7 +299,7 @@ class MstSearch():
             # else should also suffice as random search is established.
             self.find_random_combinations()
 
-        #################
+        # param checks and validation
         compile_params_lst, fit_params_lst = [], []
         for i in self.msts:
             compile_params_lst.append(i[ModelSelectionSchema.COMPILE_PARAMS])
@@ -328,6 +334,11 @@ class MstSearch():
         :param fit_params_grid: The input string repr of fit params choices.
         """
 
+        for c in self.compile_params_dict:
+            _assert_equal(type(self.compile_params_dict[c]), list, "DL: compile param values must be specified in a list")
+        for f in self.fit_params_dict:
+            _assert_equal(type(self.fit_params_dict[f]), list, "DL: fit param values must be specified in a list")
+
         if ModelSelectionSchema.GRID_SEARCH.startswith(self.search_type.lower()):
             _assert(self.num_configs is None and self.random_state is None,
                     "DL: 'num_configs' and 'random_state' must be NULL for grid search")
@@ -335,7 +346,9 @@ class MstSearch():
                 _assert(distribution_type not in compile_params_grid and distribution_type not in fit_params_grid,
                         "DL: Cannot search from a distribution with grid search")
         elif ModelSelectionSchema.RANDOM_SEARCH.startswith(self.search_type.lower()):
-            _assert(self.num_configs is not None, "DL: 'num_configs' cannot be NULL for random search")
+            _assert(self.num_configs is not None and self.num_configs > 0, "DL: 'num_configs' cannot be NULL and "
+                                                                           "needs to be a natural number "
+                                                                           "for random search")
         else:
             plpy.error("DL: 'search_type' must be either 'grid' or 'random'")
 
@@ -375,7 +388,8 @@ class MstSearch():
                 custom_fn_name.append(r[CustomFunctionSchema.FN_NAME])
         for compile_params in compile_params_lst:
             try:
-                _, _, res = parse_and_validate_compile_params(compile_params, [ModelSelectionSchema.OPTIMIZER_PARAMS_LIST])
+                _, _, res = parse_and_validate_compile_params(compile_params,
+                                                              [ModelSelectionSchema.OPTIMIZER_PARAMS_LIST])
                 # Validating if loss/metrics function called in compile_params
                 # is either defined in object table or is a built_in keras
                 # loss/metrics function
@@ -448,15 +462,13 @@ class MstSearch():
                 np.random.seed(self.random_state+seed_changes)
                 seed_changes += 1
             combination[ModelSelectionSchema.MODEL_ID] = np.random.choice(self.model_id_list)
-            compile_d = {}
-            compile_d, seed_changes = self.generate_param_config(self.compile_params_dict, compile_d, seed_changes)
-            combination[ModelSelectionSchema.COMPILE_PARAMS] = self.generate_row_string(compile_d)
-            fit_d = {}
-            fit_d, seed_changes = self.generate_param_config(self.fit_params_dict, fit_d, seed_changes)
-            combination[ModelSelectionSchema.FIT_PARAMS] = self.generate_row_string(fit_d)
+            compile_dict, seed_changes = self.generate_param_config(self.compile_params_dict, seed_changes)
+            combination[ModelSelectionSchema.COMPILE_PARAMS] = self.generate_row_string(compile_dict)
+            fit_dict, seed_changes = self.generate_param_config(self.fit_params_dict, seed_changes)
+            combination[ModelSelectionSchema.FIT_PARAMS] = self.generate_row_string(fit_dict)
             self.msts.append(combination)
 
-    def generate_param_config(self, params_dict, config_dict, seed_changes):
+    def generate_param_config(self, params_dict, seed_changes):
         """
         Generating a parameter configuration for random search.
         :param params_dict: Dictionary of params choices.
@@ -464,6 +476,7 @@ class MstSearch():
         :param seed_changes: Changes in seed for random sampling + reproducibility.
         :return: config_dict, seed_changes.
         """
+        config_dict = {}
         for cp in params_dict:
             if self.random_state:
                 np.random.seed(self.random_state+seed_changes)
@@ -477,7 +490,7 @@ class MstSearch():
                     if self.random_state:
                         np.random.seed(self.random_state+seed_changes)
                         seed_changes += 1
-                    opt_combination[i] = self.sample_val(cp, opt_values)
+                    opt_combination[i] = self.sample_val(i, opt_values)
                 config_dict[cp] = opt_combination
             else:
                 config_dict[cp] = self.sample_val(cp, param_values)
@@ -492,22 +505,23 @@ class MstSearch():
         :return: sampled value
         """
         # check if need to sample from a distribution
-        if param_value_list[-1] in self.accepted_distributions:
+        if type(param_value_list[-1]) == str and all([type(i) != str and not callable(i) for i in param_value_list[:-1]]) \
+                and len(param_value_list) > 1:
             _assert_equal(len(param_value_list), 3,
-                          "DL: {0} should have exactly 3 elements if picking from a distribution".format(cp))
+                          "DL: '{0}' should have exactly 3 elements if picking from a distribution".format(cp))
             _assert(param_value_list[1] > param_value_list[0],
-                    "DL: {0} should be of the format [lower_bound, upper_bound, distribution_type]".format(cp))
+                    "DL: '{0}' should be of the format [lower_bound, upper_bound, distribution_type]".format(cp))
             if param_value_list[-1] == 'linear':
                 return np.random.uniform(param_value_list[0], param_value_list[1])
             elif param_value_list[-1] == 'log':
                 return np.power(10, np.random.uniform(np.log10(param_value_list[0]),
-                                                                 np.log10(param_value_list[1])))
+                                                      np.log10(param_value_list[1])))
             elif param_value_list[-1] == 'log_near_one':
                 return 1.0 - np.power(10, np.random.uniform(np.log10(1.0-param_value_list[1]),
-                                                                       np.log10(1.0-param_value_list[0])))
+                                                            np.log10(1.0-param_value_list[0])))
             else:
-                plpy.error("DL: Please choose a valid distribution type {0}".format(
-                    tuple(self.accepted_distributions)))
+                plpy.error("DL: Please choose a valid distribution type for '{0}': {1}".format(
+                    cp, self.accepted_distributions))
         else:
             # random sampling
             return np.random.choice(param_value_list)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index 01a27e3..fd18edb 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -392,21 +392,21 @@ SELECT madlib.generate_model_configs(
                                         'mst_table',          -- model selection table output
                                          ARRAY[1,2],          -- model ids from model architecture table
                                          $$
-                                            {'loss': ['categorical_crossentropy'], 
-                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ], 
+                                            {'loss': ['categorical_crossentropy'],
+                                             'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.001, 0.01]} ],
                                              'metrics': ['accuracy']}
-                                         $$,                  -- compile_param_grid    
-                                         $$ 
+                                         $$,                  -- compile_param_grid
+                                         $$
                                          { 'batch_size': [64, 128],
-                                           'epochs': [10] 
-                                         } 
-                                         $$,                  -- fit_param_grid                                          
-                                         'grid'               -- search_type 
+                                           'epochs': [10]
+                                         }
+                                         $$,                  -- fit_param_grid
+                                         'grid'               -- search_type
                                          );
 SELECT * FROM mst_table ORDER BY mst_key;
 </pre>
 <pre class="result">
- mst_key | model_id |                                 compile_params                                  |        fit_params        
+ mst_key | model_id |                                 compile_params                                  |        fit_params
 ---------+----------+---------------------------------------------------------------------------------+--------------------------
        1 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=64
        2 |        1 | optimizer='Adam(lr=0.001)',metrics=['accuracy'],loss='categorical_crossentropy' | epochs=10,batch_size=128
@@ -426,7 +426,7 @@ SELECT * FROM mst_table ORDER BY mst_key;
       16 |        2 | optimizer='SGD(lr=0.01)',metrics=['accuracy'],loss='categorical_crossentropy'   | epochs=10,batch_size=128
 (16 rows)
 </pre>
-Note that above uses the same learning rate for the two optimizers. If you wanted to 
+Note that above uses the same learning rate for the two optimizers. If you wanted to
 use different learning rates and different parameters for different optimizers (common):
 <pre class="example">
 DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -487,17 +487,17 @@ SELECT madlib.generate_model_configs(
                                         'mst_table',          -- model selection table output
                                          ARRAY[1,2],          -- model ids from model architecture table
                                          $$
-                                            {'loss': ['categorical_crossentropy'], 
-                                             'optimizer_params_list': [ 
-                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']}, 
-                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}], 
+                                            {'loss': ['categorical_crossentropy'],
+                                             'optimizer_params_list': [
+                                                 {'optimizer': ['SGD'], 'lr': [0.0001, 0.001, 'log'], 'momentum': [0.95, 0.99, 'log_near_one']},
+                                                 {'optimizer': ['Adam'], 'lr': [0.01, 0.1, 'log'], 'decay': [1e-6, 1e-4, 'log']}],
                                              'metrics': ['accuracy']}
-                                         $$,                  -- compile_param_grid    
-                                         $$ 
+                                         $$,                  -- compile_param_grid
+                                         $$
                                          { 'batch_size': [64, 128],
-                                           'epochs': [10] 
-                                         } 
-                                         $$,                  -- fit_param_grid                                          
+                                           'epochs': [10]
+                                         }
+                                         $$,                  -- fit_param_grid
                                          'random',            -- search_type
                                          20
                                          );
@@ -529,9 +529,9 @@ SELECT * FROM mst_table ORDER BY mst_key;
 (20 rows)
 </pre>
 
--# Incremental loading for more complex combinations.  If it is easier to generate the model configurations 
-incrementally rather than all at once, you can do that by not dropping the model selection table and associated 
-summary table, in which case the new model configurations will be appended to the existing table.  Here we combine two 
+-# Incremental loading for more complex combinations.  If it is easier to generate the model configurations
+incrementally rather than all at once, you can do that by not dropping the model selection table and associated
+summary table, in which case the new model configurations will be appended to the existing table.  Here we combine two
 of the previous examples in to a single output table:
 <pre class="example">
 DROP TABLE IF EXISTS mst_table, mst_table_summary;
@@ -758,8 +758,29 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_model_selection_table(
     compile_params_list     VARCHAR[],
     fit_params_list         VARCHAR[]
 ) RETURNS VOID AS $$
-  SELECT MADLIB_SCHEMA.load_model_selection_table($1, $2, $3, $4, $5, NULL);
+SELECT MADLIB_SCHEMA.load_model_selection_table($1, $2, $3, $4, $5, NULL);
 $$ LANGUAGE sql VOLATILE
+    m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+/*
+--------------------------------------------------------------------------
+*/
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.generate_model_configs(
+    model_arch_table        VARCHAR,
+    model_selection_table   VARCHAR,
+    model_id_list           INTEGER[],
+    compile_params_grid     VARCHAR,
+    fit_params_grid         VARCHAR,
+    search_type             VARCHAR DEFAULT 'grid',
+    num_configs             INTEGER DEFAULT NULL,
+    random_state            INTEGER DEFAULT NULL,
+    object_table            VARCHAR DEFAULT NULL
+) RETURNS VOID AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_model_selection')
+    with AOControl(False):
+        mst_loader = madlib_keras_model_selection.MstSearch(**globals())
+        mst_loader.load()
+$$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
 
 /*
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index 0fac1bf..fac7357 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -554,7 +554,7 @@ class MstLoaderInputValidator():
         input_tbl_valid(self.model_arch_table, self.module_name)
         if self.object_table is not None:
             input_tbl_valid(self.object_table, self.module_name)
-        if self.module_name == 'load_model_selection_table':
+        if self.module_name == 'load_model_selection_table' or self.module_name == 'madlib_keras_automl':
             output_tbl_valid(self.model_selection_table, self.module_name)
             output_tbl_valid(self.model_selection_summary_table, self.module_name)
 
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 20d1574..780de8a 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -212,7 +212,9 @@ def parse_and_validate_compile_params(str_of_args, additional_params=[]):
     if len(additional_params) == 0:
         # optimizer is not a required parameter for keras compile
         _assert('optimizer' in compile_dict, "optimizer is a required parameter for compile")
-    opt_name, opt_args = parse_optimizer(compile_dict)
+        opt_name, opt_args = parse_optimizer(compile_dict)
+    else:
+        opt_name, opt_args = None, None
 
     _assert('loss' in compile_dict, "loss is a required parameter for compile")
     validate_compile_param_types(compile_dict)
@@ -221,18 +223,18 @@ def parse_and_validate_compile_params(str_of_args, additional_params=[]):
 
 def _validate_metrics(compile_dict):
     _assert('metrics' not in compile_dict.keys() or
-        compile_dict['metrics'] is None or
-        type(compile_dict['metrics']) is list,
-        "wrong input type for compile parameter metrics: multi-output model"
-        "and user defined metrics are not supported yet, please pass a list")
+            compile_dict['metrics'] is None or
+            type(compile_dict['metrics']) is list,
+            "wrong input type for compile parameter metrics: multi-output model"
+            "and user defined metrics are not supported yet, please pass a list")
     if 'metrics' in compile_dict and compile_dict['metrics']:
         unsupported_metrics_list = ['sparse_categorical_accuracy',
-            'sparse_categorical_crossentropy', 'top_k_categorical_accuracy',
-            'sparse_top_k_categorical_accuracy']
+                                    'sparse_categorical_crossentropy', 'top_k_categorical_accuracy',
+                                    'sparse_top_k_categorical_accuracy']
         _assert(len(compile_dict['metrics']) == 1,
-            "Only one metric at a time is supported.")
+                "Only one metric at a time is supported.")
         _assert(compile_dict['metrics'][0] not in unsupported_metrics_list,
-            "Metric {0} is not supported.".format(compile_dict['metrics'][0]))
+                "Metric {0} is not supported.".format(compile_dict['metrics'][0]))
 
 # Parse the optimizer name and params.
 def parse_optimizer(compile_dict):
@@ -280,7 +282,7 @@ def parse_and_validate_fit_params(fit_param_str):
         fit_params_dict = convert_string_of_args_to_dict(fit_param_str)
 
         literal_eval_fit_params = ['batch_size','epochs','verbose',
-                               'class_weight','initial_epoch','steps_per_epoch']
+                                   'class_weight','initial_epoch','steps_per_epoch']
         accepted_fit_params = literal_eval_fit_params + ['shuffle']
 
         fit_params_dict = validate_and_literal_eval_keys(fit_params_dict,
@@ -300,7 +302,7 @@ def parse_and_validate_fit_params(fit_param_str):
 def validate_and_literal_eval_keys(keys_dict, literal_eval_list, accepted_list):
     for ckey in keys_dict.keys():
         _assert(ckey in accepted_list,
-            "{0} is not currently accepted as a parameter. ".format(ckey))
+                "{0} is not currently accepted as a parameter. ".format(ckey))
         if ckey in literal_eval_list:
             try:
                 keys_dict[ckey] = ast.literal_eval(keys_dict[ckey])
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_automl.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_automl.sql_in
index c27c9f1..0516687 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_automl.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_automl.sql_in
@@ -21,6 +21,331 @@
 
 m4_include(`SQLCommon.m4')
 
+\i m4_regexp(MODULE_PATHNAME,
+             `\(.*\)libmadlib\.so',
+              `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
+)
+
+m4_changequote(`<!', `!>')
+m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!
+
+--------------------------- MADLIB KERAS AUTOML HYPERBAND TEST CASES ---------------------------
+
+-- test table dimensions / happy path
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=5, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=5, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+-- test invalid source table
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('invalid_source_table', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid source table');
+
+-- test preexisting output table
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+DROP TABLE IF EXISTS automl_mst_table, automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for preexisting output table');
+
+-- test preexisting selection table
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for preexisting selection table');
+
+-- test test invalid model id
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[2,-1], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid model id');
+
+-- test invalid automl method
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperbrand', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl method');
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hb', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl method');
+
+-- test invalid automl params {R, eta, skip_last}
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=2, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl params');
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=0, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl params');
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=1, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl params');
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=3', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid automl params');
+
+-- test invalid object table
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, 'invalid_object_table', FALSE, NULL, NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid object table');
+
+-- test invalid validation table
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT assert(trap_error($TRAP$
+    SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+        ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+        'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+        'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+        'hyperband', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, 'invalid_validation_table', NULL, NULL, NULL);
+$TRAP$)=1, 'Should error out for invalid validation table');
+
+-- test automl_method val
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyper', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyp', 'R=9, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+-- test automl_params vals {R, eta, skip_last}
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=10, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=15, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=5, eta=3, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=5, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=5, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=10, eta=4, skip_last=1', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=4, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=4, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=5, eta=5, skip_last=0', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=7, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=7, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=2', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=9, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=9, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=11, eta=2, skip_last=3', NULL, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_mst_table_summary;
+SELECT assert(COUNT(*)=1, 'The length of table does not match with the inputs') FROM automl_output_summary;
+SELECT assert(COUNT(*)=8, 'The length of table does not match with the inputs') FROM automl_output;
+SELECT assert(COUNT(*)=8, 'The length of table does not match with the inputs') FROM automl_output_info;
+
+-- test name and description
+DROP TABLE IF EXISTS automl_output, automl_output_info, automl_output_summary, automl_mst_table,
+    automl_mst_table_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output', 'iris_model_arch', 'automl_mst_table',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=11, eta=2, skip_last=3', NULL, NULL, FALSE, NULL, NULL, 'test1', 'test1 descr');
+SELECT assert(name='test1' AND description='test1 descr',
+    'invalid name/description') FROM (SELECT * FROM automl_output_summary) summary;
+
+-- test config reproducibility
+DROP TABLE IF EXISTS automl_output1, automl_output1_info, automl_output1_summary, automl_mst_table1,
+    automl_mst_table1_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output1', 'iris_model_arch', 'automl_mst_table1',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=1', 42, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+DROP TABLE IF EXISTS automl_output2, automl_output2_info, automl_output2_summary, automl_mst_table2,
+    automl_mst_table2_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output2', 'iris_model_arch', 'automl_mst_table2',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=1', 42, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+DROP TABLE IF EXISTS automl_output3, automl_output3_info, automl_output3_summary, automl_mst_table3,
+    automl_mst_table3_summary;
+SELECT madlib_keras_automl('iris_data_packed', 'automl_output3', 'iris_model_arch', 'automl_mst_table3',
+    ARRAY[1,2], $${'loss': ['categorical_crossentropy'], 'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'],
+    'lr': [0.9, 0.95, 'log'], 'epsilon': [0.3, 0.5, 'log_near_one']}, {'optimizer': ['Adam', 'SGD'],
+    'lr': [0.6, 0.65, 'log']} ], 'metrics':['accuracy'] }$$, $${'batch_size': [2, 4], 'epochs': [3]}$$,
+    'hyperband', 'R=9, eta=3, skip_last=1', 42, NULL, FALSE, NULL, NULL, NULL, NULL);
+
+SELECT assert(model_id=(SELECT model_id FROM automl_output2_info WHERE mst_key=7) AND
+              compile_params=(SELECT compile_params FROM automl_output2_info WHERE mst_key=7) AND
+              fit_params=(SELECT fit_params FROM automl_output2_info WHERE mst_key=7), 'invalid config uniformity')
+FROM (SELECT model_id, compile_params, fit_params FROM automl_output1_info WHERE mst_key=7) output1;
+SELECT assert(model_id=(SELECT model_id FROM automl_output2_info WHERE mst_key=7) AND
+              compile_params=(SELECT compile_params FROM automl_output2_info WHERE mst_key=7) AND
+              fit_params=(SELECT fit_params FROM automl_output2_info WHERE mst_key=7), 'invalid config uniformity')
+FROM (SELECT model_id, compile_params, fit_params FROM automl_output3_info WHERE mst_key=7) output3;
+
 --------------------------- HYPERBAND SCHEDULE TEST CASES ---------------------------
 -- Testing happy path with default values
 DROP TABLE IF EXISTS schedule_table;
@@ -41,4 +366,4 @@ SELECT assert(trap_error($TRAP$
                81
         );
 $TRAP$)=1, 'Should error out if schedule_table already exists');
-
+!>)
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index 69e7da7..82b2647 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -23,12 +23,12 @@ m4_include(`SQLCommon.m4')
 
 \i m4_regexp(MODULE_PATHNAME,
              `\(.*\)libmadlib\.so',
-             `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
+              `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
 )
 
 \i m4_regexp(MODULE_PATHNAME,
              `\(.*\)libmadlib\.so',
-             `\1../../modules/deep_learning/test/madlib_keras_custom_function.setup.sql_in'
+              `\1../../modules/deep_learning/test/madlib_keras_custom_function.setup.sql_in'
 )
 
 ------------------------------------------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in
index 737cf38..15b3851 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in
@@ -26,6 +26,7 @@ import math
 sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
 sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
 
+import keras # still needed here even though not explicitly used. DO NOT REMOVE.
 import unittest
 from mock import *
 import plpy_mock as plpy
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in
index 71ed047..dbd29ed 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_model_selection_table.py_in
@@ -325,7 +325,7 @@ class GenerateModelSelectionConfigsTestCase(unittest.TestCase):
         self.assertEqual(6, len(generate_mst2.msts))
 
     def tearDown(self):
-            self.module_patcher.stop()
+        self.module_patcher.stop()
 
 class LoadModelSelectionTableTestCase(unittest.TestCase):
     def setUp(self):
@@ -511,7 +511,7 @@ class MstLoaderInputValidatorTestCase(unittest.TestCase):
         self.subject._validate_model_ids = Mock()
         self.subject.parse_and_validate_fit_params = Mock()
         self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
-                                              {'name': 'custom_fn2'}]]
+                                               {'name': 'custom_fn2'}]]
         self.subject(
             self.model_selection_table,
             self.model_arch_table,