You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2021/01/12 17:19:11 UTC
[madlib] branch master updated: Remove AutoML dependency on
internals of fit_multiple
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new da9fce5 Remove AutoML dependency on internals of fit_multiple
da9fce5 is described below
commit da9fce524be12d8a4a68e7314bb7471449ff957c
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Wed Nov 18 15:31:58 2020 -0800
Remove AutoML dependency on internals of fit_multiple
AutoML should keep track of its own temporary table names, instead
of relying on assumptions about where they will be stored internally
to the FitMultipleModel class. These changes allow it to co-exist
with the Model Hopper refactor (but are independent from that PR),
and should help make AutoML more robust to any future changes we
make inside FitMultipleModel.
---
.../deep_learning/madlib_keras_automl.py_in | 79 +++++++++++-----------
.../madlib_keras_automl_hyperband.py_in | 72 ++++++++++----------
.../madlib_keras_automl_hyperopt.py_in | 60 +++++++++-------
3 files changed, 111 insertions(+), 100 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
index c795ee1..1be2db5 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
@@ -27,6 +27,7 @@ from madlib_keras_model_selection import ModelSelectionSchema
from keras_model_arch_table import ModelArchSchema
from utilities.validate_args import table_exists, drop_tables, input_tbl_valid
from utilities.validate_args import quote_ident
+from madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
class AutoMLConstants:
BRACKET = 's'
@@ -40,9 +41,11 @@ class AutoMLConstants:
SKIP_LAST = 'skip_last'
HYPERBAND_PARAMS = [R, ETA, SKIP_LAST]
LOSS_METRIC = 'training_loss_final'
- TEMP_MST_TABLE = unique_string('temp_mst_table')
- TEMP_MST_SUMMARY_TABLE = add_postfix(TEMP_MST_TABLE, '_summary')
- TEMP_OUTPUT_TABLE = unique_string('temp_output_table')
+ MST_TABLE = unique_string('mst_table')
+ MST_SUMMARY_TABLE = add_postfix(MST_TABLE, '_summary')
+ MODEL_OUTPUT_TABLE = unique_string('output_table')
+ MODEL_SUMMARY_TABLE = add_postfix(MODEL_OUTPUT_TABLE, '_summary')
+ MODEL_INFO_TABLE = add_postfix(MODEL_OUTPUT_TABLE, '_info')
METRICS_ITERS = 'metrics_iters' # custom column
NUM_CONFIGS = 'num_configs'
NUM_ITERS = 'num_iterations'
@@ -79,6 +82,7 @@ class KerasAutoML(object):
self.model_id_list = sorted(list(set(model_id_list)))
self.compile_params_grid = compile_params_grid
self.fit_params_grid = fit_params_grid
+ self.dist_key_col = DISTRIBUTION_KEY_COLNAME
if object_table is not None:
object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
@@ -110,11 +114,21 @@ class KerasAutoML(object):
AutoMLConstants.LOSS_METRIC = 'validation_loss_final'
def create_model_output_table(self):
+ # TODO:
+ # In v1.17 we did not include a __dist_key__ column. This prevents
+ # gpdb from knowing which segments the models are on when they get
+ # warm started, adding unnecessary extra data movement each round.
+ # Including in v1.18 as a first step toward that. The plan is
+ # that we will use it to eliminate this overhead in FitMultiple
+ # (improving AutoML performance and warm start in general) in a
+ # future release that would no longer be able to warmstart from
+ # a 1.17 model output table.
output_table_create_query = """
CREATE TABLE {self.model_output_table}
({ModelSelectionSchema.MST_KEY} INTEGER PRIMARY KEY,
{ModelArchSchema.MODEL_WEIGHTS} BYTEA,
- {ModelArchSchema.MODEL_ARCH} JSON)
+ {ModelArchSchema.MODEL_ARCH} JSON,
+ {self.dist_key_col} INTEGER)
""".format(self=self, ModelSelectionSchema=ModelSelectionSchema,
ModelArchSchema=ModelArchSchema)
plpy.execute(output_table_create_query)
@@ -140,10 +154,8 @@ class KerasAutoML(object):
validation_metrics DOUBLE PRECISION[],
validation_loss DOUBLE PRECISION[],
{AutoMLSchema.METRICS_ITERS} INTEGER[])
- """.format(self=self,
- ModelSelectionSchema=ModelSelectionSchema,
- ModelArchSchema=ModelArchSchema,
- AutoMLSchema=AutoMLConstants)
+ """.format(self=self, ModelSelectionSchema=ModelSelectionSchema,
+ ModelArchSchema=ModelArchSchema, AutoMLSchema=AutoMLConstants)
plpy.execute(info_table_create_query)
def update_model_selection_table(self):
@@ -160,10 +172,9 @@ class KerasAutoML(object):
AutoMLSchema=AutoMLConstants,
ModelSelectionSchema=ModelSelectionSchema))
- def generate_model_output_summary_table(self, model_training):
+ def generate_model_output_summary_table(self):
"""
Creates and populates static values related to the AutoML workload.
- :param model_training: Fit Multiple function call object.
"""
#TODO this code is duplicated in create_model_summary_table
name = 'NULL' if self.name is None else '$MAD${0}$MAD$'.format(self.name)
@@ -172,17 +183,17 @@ class KerasAutoML(object):
random_state = 'NULL' if self.random_state is None else '$MAD${0}$MAD$'.format(self.random_state)
validation_table = 'NULL' if self.validation_table is None else '$MAD${0}$MAD$'.format(self.validation_table)
- create_query = plpy.prepare("""
+ create_query = """
CREATE TABLE {self.model_summary_table} AS
SELECT
$MAD${self.source_table}$MAD$::TEXT AS source_table,
{validation_table}::TEXT AS validation_table,
$MAD${self.model_output_table}$MAD$::TEXT AS model,
$MAD${self.model_info_table}$MAD$::TEXT AS model_info,
- (SELECT dependent_varname FROM {model_training.model_summary_table})
- AS dependent_varname,
- (SELECT independent_varname FROM {model_training.model_summary_table})
- AS independent_varname,
+ (SELECT dependent_varname FROM {a.MODEL_SUMMARY_TABLE})
+ AS dependent_varname,
+ (SELECT independent_varname FROM {a.MODEL_SUMMARY_TABLE})
+ AS independent_varname,
$MAD${self.model_arch_table}$MAD$::TEXT AS model_arch_table,
$MAD${self.model_selection_table}$MAD$::TEXT AS model_selection_table,
$MAD${self.automl_method}$MAD$::TEXT AS automl_method,
@@ -190,27 +201,20 @@ class KerasAutoML(object):
{random_state}::TEXT AS random_state,
{object_table}::TEXT AS object_table,
{self.use_gpus} AS use_gpus,
- (SELECT metrics_compute_frequency FROM {model_training.model_summary_table})::INTEGER
- AS metrics_compute_frequency,
+ (SELECT metrics_compute_frequency FROM {a.MODEL_SUMMARY_TABLE})::INTEGER
+ AS metrics_compute_frequency,
{name}::TEXT AS name,
{descr}::TEXT AS description,
'{self.start_training_time}'::TIMESTAMP AS start_training_time,
'{self.end_training_time}'::TIMESTAMP AS end_training_time,
- (SELECT madlib_version FROM {model_training.model_summary_table}) AS madlib_version,
- (SELECT num_classes FROM {model_training.model_summary_table})::INTEGER AS num_classes,
- (SELECT class_values FROM {model_training.model_summary_table}) AS class_values,
- (SELECT dependent_vartype FROM {model_training.model_summary_table})
- AS dependent_vartype,
- (SELECT normalizing_const FROM {model_training.model_summary_table})
- AS normalizing_const
- """.format(self=self,
- validation_table=validation_table,
- random_state=random_state,
- object_table=object_table,
- name=name,
- descr=descr,
- model_training=model_training))
-
+ (SELECT madlib_version FROM {a.MODEL_SUMMARY_TABLE}) AS madlib_version,
+ (SELECT num_classes FROM {a.MODEL_SUMMARY_TABLE})::INTEGER AS num_classes,
+ (SELECT class_values FROM {a.MODEL_SUMMARY_TABLE}) AS class_values,
+ (SELECT dependent_vartype FROM {a.MODEL_SUMMARY_TABLE})
+ AS dependent_vartype,
+ (SELECT normalizing_const FROM {a.MODEL_SUMMARY_TABLE})
+ AS normalizing_const
+ """.format(a=AutoMLConstants, **locals())
plpy.execute(create_query)
def is_automl_method(self, method_name):
@@ -261,13 +265,10 @@ class KerasAutoML(object):
', loss=' + str(loss_value) + '\n'
return res_str
- def remove_temp_tables(self, model_training):
+ def remove_temp_tables(self):
"""
Remove all intermediate tables created for AutoML runs/updates.
- :param model_training: Fit Multiple function call object.
"""
- if not model_training:
- return
- drop_tables([model_training.original_model_output_table, model_training.model_info_table,
- model_training.model_summary_table, AutoMLConstants.TEMP_MST_TABLE,
- AutoMLConstants.TEMP_MST_SUMMARY_TABLE])
+ drop_tables([AutoMLConstants.MODEL_OUTPUT_TABLE, AutoMLConstants.MODEL_INFO_TABLE,
+ AutoMLConstants.MODEL_SUMMARY_TABLE, AutoMLConstants.MST_TABLE,
+ AutoMLConstants.MST_SUMMARY_TABLE])
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperband.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperband.py_in
index 2d10f8c..2567b42 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperband.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperband.py_in
@@ -34,10 +34,8 @@ class HyperbandSchedule():
Attributes:
schedule_table (string): Name of output table containing hyperband schedule.
- R (int): Maximum number of resources (iterations) that can be allocated
- to a single configuration.
- eta (int): Controls the proportion of configurations discarded in
- each round of successive halving.
+ R (int): Maximum number of resources (iterations) that can be allocated to a single configuration.
+ eta (int): Controls the proportion of configurations discarded in each round of successive halving.
skip_last (int): The number of last rounds to skip.
"""
def __init__(self, schedule_table, R, eta=3, skip_last=0):
@@ -218,7 +216,7 @@ class AutoMLHyperband(KerasAutoML):
random_search.load() # for populating mst tables
# for creating the summary table for usage in fit multiple
- plpy.execute("CREATE TABLE {AutoMLSchema.TEMP_MST_SUMMARY_TABLE} AS " \
+ plpy.execute("CREATE TABLE {AutoMLSchema.MST_SUMMARY_TABLE} AS " \
"SELECT * FROM {random_search.model_selection_summary_table}".format(AutoMLSchema=AutoMLConstants,
random_search=random_search))
ranges_dict = self.mst_key_ranges_dict(initial_vals)
@@ -245,26 +243,26 @@ class AutoMLHyperband(KerasAutoML):
self.reconstruct_temp_mst_table(i, ranges_dict, configs_prune_lookup) # has keys to evaluate
active_keys = plpy.execute("SELECT {ModelSelectionSchema.MST_KEY} " \
- "FROM {AutoMLSchema.TEMP_MST_TABLE}".format(AutoMLSchema=AutoMLConstants,
+ "FROM {AutoMLSchema.MST_TABLE}".format(AutoMLSchema=AutoMLConstants,
ModelSelectionSchema=ModelSelectionSchema))
for k in active_keys:
i_dict[k[ModelSelectionSchema.MST_KEY]] += 1
self.warm_start = int(i != 0)
mcf = self.metrics_compute_frequency if self._is_valid_metrics_compute_frequency(num_iterations) else None
with SetGUC("plan_cache_mode", "force_generic_plan"):
- model_training = FitMultipleModel(self.schema_madlib, self.source_table, AutoMLConstants.TEMP_OUTPUT_TABLE,
- AutoMLConstants.TEMP_MST_TABLE, num_iterations, self.use_gpus,
+ model_training = FitMultipleModel(self.schema_madlib, self.source_table, AutoMLConstants.MODEL_OUTPUT_TABLE,
+ AutoMLConstants.MST_TABLE, num_iterations, self.use_gpus,
self.validation_table, mcf, self.warm_start, self.name, self.description)
- self.update_model_output_table(model_training)
- self.update_model_output_info_table(i, model_training, initial_vals)
+ self.update_model_output_table()
+ self.update_model_output_info_table(i, initial_vals)
self.print_best_mst_so_far()
self.end_training_time = get_current_timestamp(AutoMLConstants.TIME_FORMAT)
self.add_additional_info_cols(s_dict, i_dict)
self.update_model_selection_table()
- self.generate_model_output_summary_table(model_training)
- self.remove_temp_tables(model_training)
+ self.generate_model_output_summary_table()
+ self.remove_temp_tables()
def mst_key_ranges_dict(self, initial_vals):
"""
@@ -290,7 +288,7 @@ class AutoMLHyperband(KerasAutoML):
if i == 0:
_assert_equal(len(configs_prune_lookup), 1, "invalid args")
lower_bound, upper_bound = ranges_dict[self.s_max]
- plpy.execute("CREATE TABLE {AutoMLSchema.TEMP_MST_TABLE} AS SELECT * FROM {self.model_selection_table} "
+ plpy.execute("CREATE TABLE {AutoMLSchema.MST_TABLE} AS SELECT * FROM {self.model_selection_table} "
"WHERE {ModelSelectionSchema.MST_KEY} >= {lower_bound} " \
"AND {ModelSelectionSchema.MST_KEY} <= {upper_bound}".format(self=self,
AutoMLSchema=AutoMLConstants,
@@ -299,11 +297,11 @@ class AutoMLHyperband(KerasAutoML):
ModelSelectionSchema=ModelSelectionSchema))
return
# dropping and repopulating temp_mst_table
- drop_tables([AutoMLConstants.TEMP_MST_TABLE])
+ drop_tables([AutoMLConstants.MST_TABLE])
# {mst_key} changed from SERIAL to INTEGER for safe insertions and preservation of mst_key values
create_query = """
- CREATE TABLE {AutoMLSchema.TEMP_MST_TABLE} (
+ CREATE TABLE {AutoMLSchema.MST_TABLE} (
{mst_key} INTEGER,
{model_id} INTEGER,
{compile_params} VARCHAR,
@@ -322,7 +320,7 @@ class AutoMLHyperband(KerasAutoML):
for s_val in configs_prune_lookup:
lower_bound, upper_bound = ranges_dict[s_val]
if new_configs:
- query += "INSERT INTO {AutoMLSchema.TEMP_MST_TABLE} SELECT {ModelSelectionSchema.MST_KEY}, " \
+ query += "INSERT INTO {AutoMLSchema.MST_TABLE} SELECT {ModelSelectionSchema.MST_KEY}, " \
"{ModelSelectionSchema.MODEL_ID}, {ModelSelectionSchema.COMPILE_PARAMS}, " \
"{ModelSelectionSchema.FIT_PARAMS} FROM {self.model_selection_table} WHERE " \
"{ModelSelectionSchema.MST_KEY} >= {lower_bound} AND {ModelSelectionSchema.MST_KEY} <= " \
@@ -331,7 +329,7 @@ class AutoMLHyperband(KerasAutoML):
lower_bound=lower_bound, upper_bound=upper_bound)
new_configs = False
else:
- query += "INSERT INTO {AutoMLSchema.TEMP_MST_TABLE} SELECT {ModelSelectionSchema.MST_KEY}, " \
+ query += "INSERT INTO {AutoMLSchema.MST_TABLE} SELECT {ModelSelectionSchema.MST_KEY}, " \
"{ModelSelectionSchema.MODEL_ID}, {ModelSelectionSchema.COMPILE_PARAMS}, " \
"{ModelSelectionSchema.FIT_PARAMS} " \
"FROM {self.model_info_table} WHERE {ModelSelectionSchema.MST_KEY} >= {lower_bound} " \
@@ -342,15 +340,16 @@ class AutoMLHyperband(KerasAutoML):
configs_prune_lookup_val=configs_prune_lookup[s_val])
plpy.execute(query)
- def update_model_output_table(self, model_training):
+ def update_model_output_table(self):
"""
Updates gathered information of a hyperband diagonal run to the overall model output table.
- :param model_training: Fit Multiple function call object.
"""
# updates model weights for any previously trained configs
plpy.execute("UPDATE {self.model_output_table} a SET model_weights=" \
- "t.model_weights FROM {model_training.original_model_output_table} t " \
- "WHERE a.mst_key=t.mst_key".format(self=self, model_training=model_training))
+ "t.model_weights FROM {AutoMLSchema.MODEL_OUTPUT_TABLE} t " \
+ "WHERE a.{mst_key}=t.{mst_key}".format(self=self,
+ mst_key=ModelSelectionSchema.MST_KEY,
+ AutoMLSchema=AutoMLConstants))
# truncate and re-creates table to avoid memory blow-ups
with SetGUC("dev_opt_unsafe_truncate_in_subtransaction", "on"):
@@ -361,25 +360,25 @@ class AutoMLHyperband(KerasAutoML):
rename_table(self.schema_madlib, temp_model_table, self.model_output_table)
# inserts any newly trained configs
- plpy.execute("INSERT INTO {self.model_output_table} SELECT * FROM {model_training.original_model_output_table} " \
- "WHERE {model_training.original_model_output_table}.mst_key NOT IN " \
- "(SELECT {ModelSelectionSchema.MST_KEY} FROM {self.model_output_table})".format(self=self,
- model_training=model_training,
- ModelSelectionSchema=ModelSelectionSchema))
-
- def update_model_output_info_table(self, i, model_training, initial_vals):
+ plpy.execute("""
+ INSERT INTO {self.model_output_table} SELECT *
+ FROM {AutoMLSchema.MODEL_OUTPUT_TABLE}
+ WHERE {AutoMLSchema.MODEL_OUTPUT_TABLE}.mst_key NOT IN
+ ( SELECT {ModelSelectionSchema.MST_KEY} FROM {self.model_output_table} )
+ """.format(self=self, AutoMLSchema=AutoMLConstants, ModelSelectionSchema=ModelSelectionSchema)
+ )
+
+ def update_model_output_info_table(self, i, initial_vals):
"""
Updates gathered information of a hyperband diagonal run to the overall model output info table.
:param i: outer diagonal loop iteration.
- :param model_training: Fit Multiple function call object.
:param initial_vals: Dictionary of initial configurations and resources as part of the initial hyperband
schedule.
"""
# normalizing factor for metrics_iters due to warm start
epochs_factor = sum([n[1] for n in initial_vals.values()][::-1][:i]) # i & initial_vals args needed
iters = plpy.execute("SELECT {AutoMLSchema.METRICS_ITERS} " \
- "FROM {model_training.model_summary_table}".format(AutoMLSchema=AutoMLConstants,
- model_training=model_training))
+ "FROM {AutoMLSchema.MODEL_SUMMARY_TABLE}".format(AutoMLSchema=AutoMLConstants))
metrics_iters_val = [epochs_factor+mi for mi in iters[0]['metrics_iters']] # global iteration counter
validation_update_q = "validation_metrics_final=t.validation_metrics_final, " \
@@ -396,15 +395,16 @@ class AutoMLHyperband(KerasAutoML):
"training_metrics=a.training_metrics || t.training_metrics, " \
"training_loss=a.training_loss || t.training_loss, ".format(self=self) + validation_update_q +
"{AutoMLSchema.METRICS_ITERS}=a.metrics_iters || ARRAY{metrics_iters_val}::INTEGER[] " \
- "FROM {model_training.model_info_table} t " \
- "WHERE a.mst_key=t.mst_key".format(model_training=model_training, AutoMLSchema=AutoMLConstants,
- metrics_iters_val=metrics_iters_val))
+ "FROM {AutoMLSchema.MODEL_INFO_TABLE} t " \
+ "WHERE a.{mst_key}=t.{mst_key}".format(AutoMLSchema=AutoMLConstants,
+ mst_key=ModelSelectionSchema.MST_KEY,
+ metrics_iters_val=metrics_iters_val))
# inserts info about metrics and validation for newly trained model configs
plpy.execute("INSERT INTO {self.model_info_table} SELECT t.*, ARRAY{metrics_iters_val}::INTEGER[] AS metrics_iters " \
- "FROM {model_training.model_info_table} t WHERE t.mst_key NOT IN " \
+ "FROM {AutoMLSchema.MODEL_INFO_TABLE} t WHERE t.mst_key NOT IN " \
"(SELECT {ModelSelectionSchema.MST_KEY} FROM {self.model_info_table})".format(self=self,
- model_training=model_training,
+ AutoMLSchema=AutoMLConstants,
metrics_iters_val=metrics_iters_val,
ModelSelectionSchema=ModelSelectionSchema))
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperopt.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperopt.py_in
index 34d2e97..9825f76 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperopt.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_automl_hyperopt.py_in
@@ -134,7 +134,6 @@ class AutoMLHyperopt(KerasAutoML):
self.start_training_time = get_current_timestamp(AutoMLConstants.TIME_FORMAT)
metrics_elapsed_time_offset = 0
- model_training = None
for low, high in configs_lst:
i, n = low, high - low + 1
@@ -152,14 +151,14 @@ class AutoMLHyperopt(KerasAutoML):
model_id_list, compile_params, fit_params = self.extract_param_vals(sampled_params)
msts_list = self.generate_msts(model_id_list, compile_params, fit_params)
- self.remove_temp_tables(model_training)
+ self.remove_temp_tables()
self.populate_temp_mst_tables(i, msts_list)
plpy.info("***Evaluating {n} newly suggested model configurations***".format(n=n))
start_time = time.time()
with SetGUC("plan_cache_mode", "force_generic_plan"):
- model_training = FitMultipleModel(self.schema_madlib, self.source_table, AutoMLConstants.TEMP_OUTPUT_TABLE,
- AutoMLConstants.TEMP_MST_TABLE, self.num_iters, self.use_gpus, self.validation_table,
+ model_training = FitMultipleModel(self.schema_madlib, self.source_table, AutoMLConstants.MODEL_OUTPUT_TABLE,
+ AutoMLConstants.MST_TABLE, self.num_iters, self.use_gpus, self.validation_table,
self.metrics_compute_frequency, False, self.name, self.description,
metrics_elapsed_time_offset=metrics_elapsed_time_offset)
metrics_elapsed_time_offset += time.time() - start_time
@@ -169,7 +168,7 @@ class AutoMLHyperopt(KerasAutoML):
# HyperOpt TPE update
for k, hyperopt_param in enumerate(hyperopt_params, i):
- loss_val = plpy.execute("SELECT {AutoMLSchema.LOSS_METRIC} FROM {model_training.model_info_table} " \
+ loss_val = plpy.execute("SELECT {AutoMLSchema.LOSS_METRIC} FROM {AutoMLSchema.MODEL_INFO_TABLE} " \
"WHERE {ModelSelectionSchema.MST_KEY}={k}".format(AutoMLSchema=AutoMLConstants,
ModelSelectionSchema=ModelSelectionSchema,
**locals()))[0][AutoMLConstants.LOSS_METRIC]
@@ -180,14 +179,14 @@ class AutoMLHyperopt(KerasAutoML):
trials.refresh()
# stacks info of all model configs together
- self.update_model_output_and_info_tables(model_training)
+ self.update_model_output_and_info_tables()
self.print_best_mst_so_far()
self.end_training_time = get_current_timestamp(AutoMLConstants.TIME_FORMAT)
self.update_model_selection_table()
- self.generate_model_output_summary_table(model_training)
- self.remove_temp_tables(model_training)
+ self.generate_model_output_summary_table()
+ self.remove_temp_tables()
def get_configs_list(self, num_configs, num_segments):
"""
@@ -351,11 +350,11 @@ class AutoMLHyperopt(KerasAutoML):
:param msts_list: list of generated msts.
"""
# extra sanity check
- if table_exists(AutoMLConstants.TEMP_MST_TABLE):
- drop_tables([AutoMLConstants.TEMP_MST_TABLE])
+ if table_exists(AutoMLConstants.MST_TABLE):
+ drop_tables([AutoMLConstants.MST_TABLE])
create_query = """
- CREATE TABLE {AutoMLSchema.TEMP_MST_TABLE} (
+ CREATE TABLE {AutoMLSchema.MST_TABLE} (
{mst_key} INTEGER,
{model_id} INTEGER,
{compile_params} VARCHAR,
@@ -375,7 +374,7 @@ class AutoMLHyperopt(KerasAutoML):
fit_params = mst[ModelSelectionSchema.FIT_PARAMS]
insert_query = """
INSERT INTO
- {AutoMLSchema.TEMP_MST_TABLE}(
+ {AutoMLSchema.MST_TABLE}(
{mst_key_col},
{model_id_col},
{compile_params_col},
@@ -396,7 +395,7 @@ class AutoMLHyperopt(KerasAutoML):
mst_key_val += 1
plpy.execute(insert_query)
- self.generate_mst_summary_table(AutoMLConstants.TEMP_MST_SUMMARY_TABLE)
+ self.generate_mst_summary_table(AutoMLConstants.MST_SUMMARY_TABLE)
def generate_mst_summary_table(self, tbl_name):
"""
@@ -438,21 +437,32 @@ class AutoMLHyperopt(KerasAutoML):
**locals())
plpy.execute(insert_summary_query)
- def update_model_output_and_info_tables(self, model_training):
+ def update_model_output_and_info_tables(self):
"""
Updates model output and info tables by stacking rows after each evaluation round.
- :param model_training: Fit Multiple class object
"""
- metrics_iters = plpy.execute("SELECT {AutoMLSchema.METRICS_ITERS} " \
- "FROM {model_training.original_model_output_table}_summary".format(self=self,
- model_training=model_training,
- AutoMLSchema=AutoMLConstants))[0][AutoMLConstants.METRICS_ITERS]
+ metrics_iters = plpy.execute("""
+ SELECT {AutoMLSchema.METRICS_ITERS}
+ FROM {AutoMLSchema.MODEL_SUMMARY_TABLE}
+ """.format(self=self, AutoMLSchema=AutoMLConstants))[0][AutoMLConstants.METRICS_ITERS]
if metrics_iters:
metrics_iters = "ARRAY{0}".format(metrics_iters)
+
# stacking new rows from training
- plpy.execute("INSERT INTO {self.model_output_table} SELECT * FROM " \
- "{model_training.original_model_output_table}".format(self=self, model_training=model_training))
- plpy.execute("INSERT INTO {self.model_info_table} SELECT *, {metrics_iters} FROM " \
- "{model_training.model_info_table}".format(self=self,
- model_training=model_training,
- metrics_iters=metrics_iters))
+ plpy.execute("""
+ INSERT INTO {self.model_output_table}
+ SELECT * FROM {AutoMLConstants.MODEL_OUTPUT_TABLE}
+ """.format(self=self,
+ AutoMLConstants=AutoMLConstants
+ )
+ )
+
+ plpy.execute("""
+ INSERT INTO {self.model_info_table}
+ SELECT *, {metrics_iters}
+ FROM {AutoMLConstants.MODEL_INFO_TABLE}
+ """.format(self=self,
+ AutoMLConstants=AutoMLConstants,
+ metrics_iters=metrics_iters
+ )
+ )