You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/12/17 18:20:56 UTC
[madlib] 02/02: DL: Update user docs for multi-model
This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit ca17da425ca124d9e39f0249a0336321f1d64f07
Author: Frank McQuillan <fm...@pivotal.io>
AuthorDate: Fri Dec 13 17:41:57 2019 -0800
DL: Update user docs for multi-model
---
doc/mainpage.dox.in | 16 +-
.../deep_learning/input_data_preprocessor.sql_in | 70 +-
.../deep_learning/keras_model_arch_table.sql_in | 23 +-
.../modules/deep_learning/madlib_keras.sql_in | 82 +-
.../madlib_keras_fit_multiple_model.sql_in | 1335 ++++++++++++++++++++
.../deep_learning/madlib_keras_gpu_info.sql_in | 2 +-
.../madlib_keras_model_selection.sql_in | 6 +-
7 files changed, 1454 insertions(+), 80 deletions(-)
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index 38b61c5..0e7b426 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -13,12 +13,11 @@ Useful links:
<li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
<li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
<li>User documentation for earlier releases:
- <a href="../v1.16/index.html">v1.15</a>,
+ <a href="../v1.16/index.html">v1.16</a>,
<a href="../v1.15.1/index.html">v1.15.1</a>,
<a href="../v1.15/index.html">v1.15</a>,
<a href="../v1.14/index.html">v1.14</a>,
- <a href="../v1.13/index.html">v1.13</a>,
- <a href="../v1.12/index.html">v1.12</a>
+ <a href="../v1.13/index.html">v1.13</a>
</li>
</ul>
@@ -292,9 +291,16 @@ Interface and implementation are subject to change.
@{
@defgroup grp_gpu_configuration GPU Configuration
@defgroup grp_keras Keras
- @defgroup grp_keras_model_arch Load Model
+ @defgroup grp_keras_model_arch Load Models
+ @defgroup grp_model_selection Model Selection
+ @brief Train multiple deep learning models at the same time.
+ @details Train multiple deep learning models at the same time.
+ @{
+ @defgroup grp_automl AutoML
+ @defgroup grp_keras_run_model_selection Run Model Selection
+ @defgroup grp_keras_setup_model_selection Setup Model Selection
+ @}
@defgroup grp_input_preprocessor_dl Preprocessor for Images
- @defgroup grp_keras_model_selection Setup Model Selection
@}
@defgroup grp_bayes Naive Bayes Classification
@defgroup grp_sample Random Sampling
diff --git a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.sql_in b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.sql_in
index 6d5f0ad..ddc356f 100644
--- a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.sql_in
+++ b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.sql_in
@@ -44,9 +44,14 @@ Interface and implementation are subject to change. </em>
<li class="level1"><a href="#related">Related Topics</a></li>
</ul></div>
-For deep learning based techniques such as
-convolutional neural nets, the input
-data is often images. These images can be
+This preprocessor is a utility that prepares image data for use
+by frameworks like Keras and TensorFlow that support mini-batching
+as an optimization option. The advantage of using mini-batching is that
+it can perform better than stochastic gradient descent
+because it uses more than one training example at a time, typically
+resulting in faster and smoother convergence [1].
+
+Images can be
represented as an array of numbers
where each element represents grayscale,
RGB or other channel values for each
@@ -114,16 +119,20 @@ training_preprocessor_dl(source_table,
</dd>
<dt>buffer_size (optional)</dt>
- <dd>INTEGER, default: computed. Buffer size is the
- number of rows from the
- source table that are packed into one row of the preprocessor
- output table. The default value is computed considering size of
- the source table, number of independent variables,
- and number of segments in the database cluster.
- @note The preprocessor tries to pack data and distribute it
- evenly based on the number of input rows. Sometimes you won't
- necessarily get the exact number of
- rows specified by the 'buffer_size' parameter.
+ <dd>INTEGER, default: computed. Buffer size is the number of rows from
+ the source table that are packed into one row of the preprocessor output
+ table. In the case of images, the source table will have one image per row,
+ and the output table will have multiple images per row. The default value is
+ computed considering the sizes of the source table and images and the number
+ of segments in the database cluster.
+ @note Using the default for 'buffer_size' will produce buffers that are relatively
+ large, which generally results in the fastest fit() runtime with Keras. Setting a
+ smaller buffer size may cause the preprocessor to run faster (although this is not
+ guaranteed, since it depends on database cluster size, data set, and other factors).
+ But since preprocessing is usually a one-time operation and fit() is called many times,
+ by default buffer sizes are optimized to make fit() as fast as possible. Note that
+ specifying a 'buffer_size' does not guarantee that exact value will be used. Actual buffer
+ size is adjusted to avoid data skew, which adversely impacts fit() runtime.
</dd>
<dt>normalizing_const (optional)</dt>
@@ -152,7 +161,7 @@ training_preprocessor_dl(source_table,
You can also specify the name of a resources table containing the segments
to be used for training. This table must contain a column called 'dbid' that
- specifies the segment id from the 'gp_segment_configuration' table [1].
+ specifies the segment id from the 'gp_segment_configuration' table [2].
Refer to the utility function <a href="group__grp__gpu__configuration.html">GPU Configuration</a>
for more information on how to
identify segments attached to hosts that are GPU enabled.
@@ -214,16 +223,20 @@ validation_preprocessor_dl(source_table,
</dd>
<dt>buffer_size (optional)</dt>
- <dd>INTEGER, default: computed. Buffer size is the
- number of rows from the
- source table that are packed into one row of the preprocessor
- output table. The default value is computed considering size of
- the source table, number of independent variables,
- and number of segments in the database cluster.
- @note The preprocessor tries to pack data and distribute it
- evenly based on the number of input rows. Sometimes you won't
- necessarily get the exact number of
- rows specified in by the 'buffer_size' parameter.
+ <dd>INTEGER, default: computed. Buffer size is the number of rows from
+ the source table that are packed into one row of the preprocessor output
+ table. In the case of images, the source table will have one image per row,
+ and the output table will have multiple images per row. The default value is
+ computed considering the sizes of the source table and images and the number
+ of segments in the database cluster.
+ @note Using the default for 'buffer_size' will produce buffers that are relatively
+ large, which generally results in the fastest fit() runtime with Keras. Setting a
+ smaller buffer size may cause the preprocessor to run faster (although this is not
+ guaranteed, since it depends on database cluster size, data set, and other factors).
+ But since preprocessing is usually a one-time operation and fit() is called many times,
+ by default buffer sizes are optimized to make fit() as fast as possible. Note that
+ specifying a 'buffer_size' does not guarantee that exact value will be used. Actual buffer
+ size is adjusted to avoid data skew, which adversely impacts fit() runtime.
</dd>
<dt>distribution_rules (optional)</dt>
@@ -239,7 +252,7 @@ validation_preprocessor_dl(source_table,
You can also specify the name of a resources table containing the segments
to be used for training. This table must contain a column called 'dbid' that
- specifies the segment id from the 'gp_segment_configuration' table [1].
+ specifies the segment id from the 'gp_segment_configuration' table [2].
Refer to the utility function <a href="group__grp__gpu__configuration.html">GPU Configuration</a>
for more information on how to
identify segments attached to hosts that are GPU enabled.
@@ -350,7 +363,7 @@ both validation_preprocessor_dl() and training_preprocessor_dl() ):
<th>__internal_gpu_config__</th>
<td>For internal use. (Note: this is the list of segment id's
where data is distributed in the form of 'content' id, which
- is different from 'dbid' [1].)</td>
+ is different from 'dbid' [2].)</td>
</tr>
</table>
@@ -832,7 +845,10 @@ __internal_gpu_config__ | {0,1}
@anchor references
@par References
-[1] Greenplum 'gp_segment_configuration' table https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html
+[1] "Neural Networks for Machine Learning", Lectures 6a and 6b on mini-batch gradient descent,
+Geoffrey Hinton with Nitish Srivastava and Kevin Swersky, http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+
+[2] Greenplum 'gp_segment_configuration' table https://gpdb.docs.pivotal.io/latest/ref_guide/system_catalogs/gp_segment_configuration.html
@anchor related
@par Related Topics
diff --git a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in
index b07900e..b1bf150 100644
--- a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in
+++ b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.sql_in
@@ -30,8 +30,7 @@ m4_include(`SQLCommon.m4')
/**
@addtogroup grp_keras_model_arch
-@brief Utility function to load model architectures and weights into a table for
-use by deep learning algorithms.
+@brief Utility function to load model architectures and weights into a table.
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>
@@ -109,12 +108,22 @@ load_keras_model(
</tr>
<tr>
<th>model_weights</th>
- <td>bytea. Weights of the model which may be used for warm start
+ <td>BYTEA. Weights of the model which may be used for warm start
or transfer learning.
Weights are stored as a PostgreSQL binary data type.
</td>
</tr>
<tr>
+ <th>name</th>
+ <td>TEXT. Name of model (free text).
+ </td>
+ </tr>
+ <tr>
+ <th>description</th>
+ <td>TEXT. Description of model (free text).
+ </td>
+ </tr>
+ <tr>
<th>__internal_madlib_id__</th>
<td>TEXT. Unique id for model arch. This is an id used internally be MADlib.
</td>
@@ -245,8 +254,8 @@ model.add(Dense(3, activation='softmax'))
\#
\# get weights, flatten and serialize
weights = model.get_weights()
-weights_flat = [ w.flatten() for w in weights ]
-weights1d = np.array([j for sub in weights_flat for j in sub])
+weights_flat = [w.flatten() for w in weights]
+weights1d = np.concatenate(weights_flat).ravel()
weights_bytea = weights1d.tostring()
\#
\# load query
@@ -288,8 +297,8 @@ model.add(Dense(3, activation='softmax'))
\#
\# get weights, flatten and serialize
weights = model.get_weights()
-weights_flat = [ w.flatten() for w in weights ]
-weights1d = np.array([j for sub in weights_flat for j in sub])
+weights_flat = [w.flatten() for w in weights]
+weights1d = np.concatenate(weights_flat).ravel()
weights_bytea = psycopg2.Binary(weights1d.tostring())
\#
\# load query
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index 7de95bc..6127031 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -31,6 +31,8 @@ m4_include(`SQLCommon.m4')
/**
@addtogroup grp_keras
+@brief Fit, evaluate and predict using the Keras API.
+
<div class="toc"><b>Contents</b><ul>
<li class="level1"><a href="#keras_fit">Fit</a></li>
<li class="level1"><a href="#keras_evaluate">Evaluate</a></li>
@@ -53,8 +55,11 @@ Keras was developed for fast experimentation. It can run
on top of different backends and the one that is currently
supported by MADlib is TensorFlow [2]. The implementation
in MADlib is distributed and designed to train
-a single large model across multiple segments (workers)
-in a Greenplum database. PostgreSQL is also supported.
+a single model across multiple segments (workers)
+in Greenplum database. (PostgreSQL is also supported.)
+Alternatively, to train multiple models at the same time for model
+architecture search or hyperparameter tuning, you can
+use <a href="group__grp__keras__run__model__selection.html">Model Selection</a>.
The main use case is image classification
using sequential models, which are made up of a
@@ -74,16 +79,14 @@ can perform better than stochastic gradient descent
because it uses more than one training example at a time,
typically resulting faster and smoother convergence [3].
-You can also do inference on models that have not been trained with MADlib,
-but rather imported from an external source.
+You can also do inference on models that have not been trained in MADlib,
+but rather imported from an external source. This is in the section
+called "Predict BYOM" below, where "BYOM" stands for "Bring Your Own Model."
Note that the following MADlib functions are targeting a specific Keras
version (2.2.4) with a specific Tensorflow kernel version (1.14).
Using a newer or older version may or may not work as intended.
-@brief Solves image classification problems by calling
-the Keras API
-
@anchor keras_fit
@par Fit
The fit (training) function has the following format:
@@ -171,19 +174,19 @@ madlib_keras_fit(
</DD>
<DT>use_gpus (optional)</DT>
- <DD>BOOLEAN, default: FALSE (i.e., CPU).
- Flag to enable GPU support for training neural network.
- The number of GPUs to use is determined by the parameters
- passed to the preprocessor.
+ <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs
+ are to be used for training the neural network. Set to TRUE to use GPUs.
@note
- We have seen some memory related issues when segments
- share GPU resources.
- For example, if you provide 1 GPU and your
- database cluster is set up to have 4
+ This parameter must not conflict with how the distribution rules are set in
+ the preprocessor function. For example, if you set a distribution rule to use
+ certain segments on hosts that do not have GPUs attached, you will get an error
+ if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues
+ when segments share GPU resources.
+ For example, if you have 1 GPU per segment host and your cluster has 4
segments per segment host, it means that all 4
- segments on a segment host will share the same
- GPU. The current recommended
+ segments will share the same
+ GPU on each host. The current recommended
configuration is 1 GPU per segment.
</DD>
@@ -498,27 +501,29 @@ madlib_keras_evaluate(
<td>Type of metric used that was used in the training step.</td>
</tr>
-
<DT>use_gpus (optional)</DT>
- <DD>BOOLEAN, default: FALSE (i.e., CPU).
- Flag to enable GPU support for evaluating neural network.
- The number of GPUs to use is determined by the parameters
- passed to the preprocessor.
+ <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs
+ are to be used for training the neural network. Set to TRUE to use GPUs.
@note
- We have seen some memory related issues when segments
- share GPU resources.
- For example, if you provide 1 GPU and your
- database cluster is set up to have 4
+ This parameter must not conflict with how the distribution rules are set in
+ the preprocessor function. For example, if you set a distribution rule to use
+ certain segments on hosts that do not have GPUs attached, you will get an error
+ if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues
+ when segments share GPU resources.
+ For example, if you have 1 GPU per segment host and your cluster has 4
segments per segment host, it means that all 4
- segments on a segment host will share the same
- GPU. The current recommended
+ segments will share the same
+ GPU on each host. The current recommended
configuration is 1 GPU per segment.
</DD>
-
<DT>mst_key (optional)</DT>
- <DD>INTEGER, default: NULL. To be filled.
+ <DD>INTEGER, default: NULL. ID that defines a unique tuple for
+ model architecture-compile parameters-fit parameters in a model
+ selection table. Do not use this if training one model at a time using madlib_keras_fit().
+ See the <a href="group__grp__keras__run__model__selection.html">Model Selection</a> section
+ for more details on model selection by training multiple models at a time.
</DD>
</DL>
@@ -599,7 +604,7 @@ madlib_keras_predict(
and 'prob' gives the probability value for each class.
</DD>
- <DT>use_gpus(optional)</DT>
+ <DT>use_gpus (optional)</DT>
<DD>BOOLEAN, default: FALSE (i.e., CPU).
Flag to enable GPU support for training neural network.
The number of GPUs to use is determined by the parameters
@@ -617,12 +622,15 @@ madlib_keras_predict(
</DD>
<DT>mst_key (optional)</DT>
- <DD>INTEGER, default: NULL. To be filled.
+ <DD>INTEGER, default: NULL. ID that defines a unique tuple for
+ model architecture-compile parameters-fit parameters in a model
+ selection table. Do not use this if training one model at a time using madlib_keras_fit().
+ See the <a href="group__grp__keras__run__model__selection.html">Model Selection</a> section
+ for more details on model selection by training multiple models at a time.
</DD>
</DL>
-
@anchor keras_predict_byom
@par Predict BYOM (bring your own model)
The predict BYOM function allows you to do inference on models that
@@ -709,7 +717,7 @@ madlib_keras_predict_byom(
gives the actual prediction and 'prob' gives the probability value for each class.
</DD>
- <DT>use_gpus(optional)</DT>
+ <DT>use_gpus (optional)</DT>
<DD>BOOLEAN, default: FALSE (i.e., CPU).
Flag to enable GPU support for training neural network.
The number of GPUs to use is determined by the parameters
@@ -1193,7 +1201,7 @@ SELECT madlib.madlib_keras_predict_byom('model_arch_library', -- model arch tab
'attributes', -- independent var
'iris_predict_byom', -- output table
'response', -- prediction type
- 0, -- gpus per host
+ FALSE, -- use GPUs
ARRAY['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], -- class values
1.0 -- normalizing const
);
@@ -1278,7 +1286,7 @@ SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
10, -- num_iterations
- 0, -- GPUs per host
+ FALSE, -- use GPUs
'iris_test_packed', -- validation dataset
3, -- metrics compute frequency
FALSE, -- warm start
@@ -1384,7 +1392,7 @@ SELECT madlib.madlib_keras_fit('iris_train_packed', -- source table
$$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$, -- compile_params
$$ batch_size=5, epochs=3 $$, -- fit_params
5, -- num_iterations
- 0, -- GPUs per host
+ FALSE, -- use GPUs
'iris_test_packed', -- validation dataset
1, -- metrics compute frequency
TRUE, -- warm start
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
index d1c261a..1ddbd18 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
@@ -28,6 +28,1341 @@
m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_keras_run_model_selection
+
+@brief Explore network architectures and hyperparameters by training many models a time.
+
+<div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#keras_fit">Fit</a></li>
+<li class="level1"><a href="#keras_evaluate">Evaluate</a></li>
+<li class="level1"><a href="#keras_predict">Predict</a></li>
+<li class="level1"><a href="#example">Examples</a></li>
+<li class="level1"><a href="#notes">Notes</a></li>
+<li class="level1"><a href="#background">Technical Background</a></li>
+<li class="level1"><a href="#literature">Literature</a></li>
+<li class="level1"><a href="#related">Related Topics</a></li>
+</ul></div>
+
+\warning <em> This MADlib method is still in early stage development.
+Interface and implementation are subject to change. </em>
+
+This module allows you to explore network architectures and
+hyperparameters by training many models a time across the
+database cluster. The aim is to support efficient empirical comparison of multiple
+training configurations. This process is called model selection,
+and the implementation here is based on a parallel execution strategy
+called model hopper parallelism (MOP) [1,2].
+
+Models are designed in Keras [3], which is a high-level neural
+network API written in Python. It can run
+on top of different backends and the one that is currently
+supported by MADlib is TensorFlow [4].
+
+The main use case is image classification
+using sequential models, which are made up of a
+linear stack of layers. This includes multilayer perceptrons (MLPs)
+and convolutional neural networks (CNNs). Regression is not
+currently supported.
+
+Before doing model selection in MADlib you will need to run
+the mini-batch preprocessor, and create a table with the various models
+and hyperparameters to try.
+
+You can mini-batch the training and evaluation datasets by using the
+<a href="group__grp__input__preprocessor__dl.html">Preprocessor
+for Images</a> which is a utility that prepares image data for
+use by models that support mini-batch as an optimization option.
+This is a one-time operation and you would only
+need to re-run the preprocessor if your input data has changed.
+The advantage of using mini-batching is that it
+can perform better than stochastic gradient descent
+because it uses more than one training example at a time,
+typically resulting faster and smoother convergence [5].
+The input preprocessor also sets the distribution rules
+for the training data. For example, you may only want
+to train models on segments that reside on hosts that are GPU enabled.
+
+You can set up the models and hyperparameters to try with the
+<a href="group__grp__keras__model__selection.html">Setup
+Model Selection</a> utility to define the unique combinations
+of model architectures, compile and fit parameters.
+
+@anchor keras_fit
+@par Fit
+The fit (training) function has the following format:
+
+<pre class="syntax">
+madlib_keras_fit(
+ source_table,
+ model_output_table,
+ model_selection_table,
+ num_iterations,
+ use_gpus,
+ validation_table,
+ metrics_compute_frequency,
+ warm_start,
+ name,
+ description
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+ <dt>source_table</dt>
+ <dd>TEXT. Name of the table containing the training data.
+ This is the name of the output
+ table from the image preprocessor. Independent
+ and dependent variables are specified in the preprocessor
+ step which is why you do not need to explictly state
+ them here as part of the fit function.</dd>
+
+ <dt>model_output_table</dt>
+ <dd>TEXT. Name of the output table containing the
+ multiple models created.
+ Details of output tables are shown below.
+ </dd>
+
+ <dt>model_selection_table</dt>
+ <dd>TEXT. Name of the table containing model selection parameters to be tried.
+ Here we mean both hyper-parameter tuning and model architecture search.
+ </dd>
+
+ <DT>num_iterations</DT>
+ <DD>INTEGER. Number of iterations to train.
+ </DD>
+
+ <DT>use_gpus (optional)</DT>
+ <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs
+ are to be used for training the neural network. Set to TRUE to use GPUs.
+
+ @note
+ This parameter must not conflict with how the distribution rules are set in
+ the preprocessor function. For example, if you set a distribution rule to use
+ certain segments on hosts that do not have GPUs attached, you will get an error
+ if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues
+ when segments share GPU resources.
+ For example, if you have 1 GPU per segment host and your cluster has 4
+ segments per segment host, it means that all 4
+ segments will share the same
+ GPU on each host. The current recommended
+ configuration is 1 GPU per segment.
+ </DD>
+
+ <dt>validation_table (optional)</dt>
+ <dd>TEXT, default: none. Name of the table containing
+ the validation dataset.
+ Note that the validation dataset must be preprocessed
+ in the same way as the training dataset, so this
+ is the name of the output
+ table from running the image preprocessor on the validation dataset.
+ Using a validation dataset can mean a
+ longer training time, depending on its size.
+ This can be controlled using the 'metrics_compute_frequency'
+ parameter described below.</dd>
+
+ <DT>metrics_compute_frequency (optional)</DT>
+ <DD>INTEGER, default: once at the end of training
+ after 'num_iterations'. Frequency to compute per-iteration
+ metrics for the training dataset and validation dataset
+ (if specified). There can be considerable cost to
+ computing metrics every iteration, especially if the
+ training dataset is large. This parameter is a way of
+ controlling the frequency of those computations.
+ For example, if you specify 5, then metrics will be computed
+ every 5 iterations as well as at the end of training
+ after 'num_iterations'. If you use the default,
+ metrics will be computed only
+ once after 'num_iterations' have completed.
+ </DD>
+
+ <DT>warm_start (optional)</DT>
+ <DD>BOOLEAN, default: FALSE.
+ Initalize weights with the coefficients
+ from the last call to the fit
+ function. If set to TRUE, weights will be
+ initialized from the model table
+ generated by the previous training run.
+
+ @note
+ The warm start feature works based on the name of the
+ model output table from a previous training run.
+ When using warm start, do not drop the model output table
+ or the model output summary table
+ before calling the fit function, since these are needed to obtain the
+ weights from the previous run.
+ If you are not using warm start, the model output table
+ and the model output table summary must be dropped in
+ the usual way before calling the training function.
+ </DD>
+
+ <DT>name (optional)</DT>
+ <DD>TEXT, default: NULL.
+ Free text string to identify a name, if desired.
+ </DD>
+
+ <DT>description (optional)</DT>
+ <DD>TEXT, default: NULL.
+ Free text string to provide a description, if desired.
+ </DD>
+</dl>
+
+<b>Output tables</b>
+<br>
+ The model output table produced by fit contains the following columns.
+ There is one row per model as per the rows in the 'model_selection_table':
+ <table class="output">
+ <tr>
+ <th>mst_key</th>
+ <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters,
+ as defined in the 'model_selection_table'.</td>
+ </tr>
+ <tr>
+ <th>model_weights</th>
+ <td>BYTEA8. Byte array containing the weights of the neural net.</td>
+ </tr>
+ <tr>
+ <th>model_arch</th>
+ <td>TEXT. A JSON representation of the model architecture
+ used in training.</td>
+ </tr>
+ </table>
+
+ An info table named \<model_output_table\>_info is also created, which has the following columns.
+ There is one row per model as per the rows in the 'model_selection_table':
+ <table class="output">
+ <tr>
+ <th>mst_key</th>
+ <td>INTEGER. ID that defines a unique tuple for model architecture-compile parameters-fit parameters,
+ as defined in the 'model_selection_table'.</td>
+ </tr>
+ <tr>
+ <th>model_id</th>
+ <td>INTEGER. ID that defines model in the 'model_arch_table'.</td>
+ </tr>
+ <tr>
+ <th>compile_params</th>
+ <td>Compile parameters passed to Keras.</td>
+ </tr>
+ <tr>
+ <th>fit_params</th>
+ <td>Fit parameters passed to Keras.</td>
+ </tr>
+ <tr>
+ <th>model_type</th>
+ <td>General identifier for type of model trained.
+ Currently says 'madlib_keras'.</td>
+ </tr>
+ <tr>
+ <th>model_size</th>
+ <td>Size of the model in KB. Models are stored in
+ 'bytea' data format which is used for binary strings
+ in PostgreSQL type databases.</td>
+ </tr>
+ <tr>
+ <th>metrics_elapsed_time</th>
+ <td> Array of elapsed time for metric computations as
+ per the 'metrics_compute_frequency' parameter.
+ Useful for drawing a curve showing loss, accuracy or
+ other metrics as a function of time.
+ For example, if 'metrics_compute_frequency=5'
+ this would be an array of elapsed time for every 5th
+ iteration, plus the last iteration.</td>
+ </tr>
+ <tr>
+ <th>metrics_type</th>
+ <td>Metric specified in the 'compile_params'.</td>
+ </tr>
+ <tr>
+ <th>training_metrics_final</th>
+ <td>Final value of the training
+ metric after all iterations have completed.
+ The metric reported is the one
+ specified in the 'metrics_type' parameter.</td>
+ </tr>
+ <tr>
+ <th>training_loss_final</th>
+ <td>Final value of the training loss after all
+ iterations have completed.</td>
+ </tr>
+ <tr>
+ <th>training_metrics</th>
+ <td>Array of training metrics as
+ per the 'metrics_compute_frequency' parameter.
+ For example, if 'metrics_compute_frequency=5'
+ this would be an array of metrics for every 5th
+ iteration, plus the last iteration.</td>
+ </tr>
+ <tr>
+ <th>training_loss</th>
+ <td>Array of training losses as
+ per the 'metrics_compute_frequency' parameter.
+ For example, if 'metrics_compute_frequency=5'
+ this would be an array of losses for every 5th
+ iteration, plus the last iteration.</td>
+ </tr>
+ <tr>
+ <th>validation_metrics_final</th>
+ <td>Final value of the validation
+ metric after all iterations have completed.
+ The metric reported is the one
+ specified in the 'metrics_type' parameter.</td>
+ </tr>
+ <tr>
+ <th>validation_loss_final</th>
+ <td>Final value of the validation loss after all
+ iterations have completed.</td>
+ </tr>
+ <tr>
+ <th>validation_metrics</th>
+ <td>Array of validation metrics as
+ per the 'metrics_compute_frequency' parameter.
+ For example, if 'metrics_compute_frequency=5'
+ this would be an array of metrics for every 5th
+ iteration, plus the last iteration.</td>
+ </tr>
+ <tr>
+ <th>validation_loss</th>
+ <td>Array of validation losses as
+ per the 'metrics_compute_frequency' parameter.
+ For example, if 'metrics_compute_frequency=5'
+ this would be an array of losses for every 5th
+ iteration, plus the last iteration.</td>
+ </tr>
+
+ </table>
+
+ A summary table named \<model\>_summary is also created, which has the following columns:
+ <table class="output">
+ <tr>
+ <th>source_table</th>
+ <td>Source table used for training.</td>
+ </tr>
+ <tr>
+ <th>validation_table</th>
+ <td>Name of the table containing
+ the validation dataset (if specified).</td>
+ </tr>
+ <tr>
+ <th>model</th>
+ <td>Name of the output table containing
+ the model for each model selection tuple.</td>
+ </tr>
+ <tr>
+ <th>model_info</th>
+ <td>Name of the output table containing
+ the model performance and other info for
+ each model selection tuple.</td>
+ </tr>
+ <tr>
+ <th>dependent_varname</th>
+ <td>Dependent variable column from the original
+ source table in the image preprocessing step.</td>
+ </tr>
+ <tr>
+ <th>independent_varname</th>
+ <td>Independent variables column from the original
+ source table in the image preprocessing step.</td>
+ </tr>
+ <tr>
+ <th>model_arch_table</th>
+ <td>Name of the table containing
+ the model architecture and (optionally) the
+ initial model weights.</td>
+ </tr>
+ <tr>
+ <th>num_iterations</th>
+ <td>Number of iterations of training completed.</td>
+ </tr>
+ <tr>
+ <th>metrics_compute_frequency</th>
+ <td>Frequency that per-iteration metrics are computed
+ for the training dataset and validation
+ datasets.</td>
+ </tr>
+ <tr>
+ <th>warm_start</th>
+ <td>Indicates whether warm start used or not.</td>
+ </tr>
+ <tr>
+ <th>name</th>
+ <td>Name of the training run (free text).</td>
+ </tr>
+ <tr>
+ <th>description</th>
+ <td>Description of the training run (free text).</td>
+ </tr>
+ <tr>
+ <th>start_training_time</th>
+ <td>Timestamp for start of training.</td>
+ </tr>
+ <tr>
+ <th>end_training_time</th>
+ <td>Timestamp for end of training.</td>
+ </tr>
+ <tr>
+ <th>madlib_version</th>
+ <td>Version of MADlib used.</td>
+ </tr>
+ <tr>
+ <th>num_classes</th>
+ <td>Count of distinct classes values used.</td>
+ </tr>
+ <tr>
+ <th>class_values</th>
+ <td>Array of actual class values used.</td>
+ </tr>
+ <tr>
+ <th>dependent_vartype</th>
+ <td>Data type of the dependent variable.</td>
+ </tr>
+ <tr>
+ <th>normalizing_constant</th>
+ <td>Normalizing constant used from the
+ image preprocessing step.</td>
+ </tr>
+ <tr>
+ <th>metrics_iters</th>
+ <td>Array indicating the iterations for which
+ metrics are calculated, as derived from the
+ parameters 'num_iterations' and 'metrics_compute_frequency'.
+ For example, if 'num_iterations=5'
+ and 'metrics_compute_frequency=2', then 'metrics_iters' value
+ would be {2,4,5} indicating that metrics were computed
+ at iterations 2, 4 and 5 (at the end).
+ If 'num_iterations=5'
+ and 'metrics_compute_frequency=1', then 'metrics_iters' value
+ would be {1,2,3,4,5} indicating that metrics were computed
+ at every iteration.</td>
+ </tr>
+ </table>
+
+@anchor keras_evaluate
+@par Evaluate
+The evaluation function has the following format:
+
+<pre class="syntax">
+madlib_keras_evaluate(
+ model_table,
+ test_table,
+ output_table,
+ use_gpus
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+
+<DT>model_table</DT>
+ <DD>TEXT. Name of the table containing the model
+ to use for validation.
+ </DD>
+
+ <DT>test_table</DT>
+ <dd>TEXT. Name of the table containing the evaluation dataset.
+ Note that test/validation data must be preprocessed in the same
+ way as the training dataset, so
+ this is the name of the output
+ table from the image preprocessor. Independent
+ and dependent variables are specified in the preprocessor
+ step which is why you do not need to explictly state
+ them here as part of the fit function.</dd>
+
+ <DT>output_table</DT>
+ <DD>TEXT. Name of table that validation output will be
+ written to. Table contains:</DD>
+ <table class="output">
+ <tr>
+ <th>loss</th>
+ <td>Loss value on evaluation dataset.</td>
+ </tr>
+ <tr>
+ <th>metric</th>
+ <td>Metric value on evaluation dataset, where 'metrics_type'
+ below identifies the type of metric.</td>
+ </tr>
+ <tr>
+ <th>metrics_type</th>
+ <td>Type of metric used that was used in the training step.</td>
+ </tr>
+
+ <DT>use_gpus (optional)</DT>
+ <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines whether GPUs
+ are to be used for training the neural network. Set to TRUE to use GPUs.
+
+ @note
+ This parameter must not conflict with how the distribution rules are set in
+ the preprocessor function. For example, if you set a distribution rule to use
+ certain segments on hosts that do not have GPUs attached, you will get an error
+ if you set ‘use_gpus’ to TRUE. Also, we have seen some memory related issues
+ when segments share GPU resources.
+ For example, if you have 1 GPU per segment host and your cluster has 4
+ segments per segment host, it means that all 4
+ segments will share the same
+ GPU on each host. The current recommended
+ configuration is 1 GPU per segment.
+ </DD>
+</DL>
+
+@anchor keras_predict
+@par Predict
+The prediction function has the following format:
+<pre class="syntax">
+madlib_keras_predict(
+ model_table,
+ test_table,
+ id_col,
+ independent_varname,
+ output_table,
+ pred_type,
+ use_gpus
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+
+<DT>model_table</DT>
+ <DD>TEXT. Name of the table containing the model
+ to use for prediction.
+ </DD>
+
+ <DT>test_table</DT>
+ <DD>TEXT. Name of the table containing the dataset to
+ predict on. Note that test data is not preprocessed (unlike
+ fit and evaluate) so put one test image per row for prediction.
+ Also see the comment below for the 'independent_varname' parameter
+ regarding normalization.
+
+ </DD>
+
+ <DT>id_col</DT>
+ <DD>TEXT. Name of the id column in the test data table.
+ </DD>
+
+ <DT>independent_varname</DT>
+ <DD>TEXT. Column with independent variables in the test table.
+ If a 'normalizing_const' is specified when preprocessing the
+ training dataset, this same normalization will be applied to
+ the independent variables used in predict.
+ </DD>
+
+ <DT>output_table</DT>
+ <DD>TEXT. Name of the table that prediction output will be
+ written to. Table contains:</DD>
+ <table class="output">
+ <tr>
+ <th>id</th>
+ <td>Gives the 'id' for each prediction, corresponding to each row from the test_table.</td>
+ </tr>
+ <tr>
+ <th>estimated_COL_NAME</th>
+ <td>
+ (For pred_type='response') The estimated class
+ for classification, where
+ COL_NAME is the name of the column to be
+ predicted from test data.
+ </td>
+ </tr>
+ <tr>
+ <th>prob_CLASS</th>
+ <td>
+ (For pred_type='prob' for classification) The
+ probability of a given class.
+ There will be one column for each class
+ in the training data.
+ </td>
+ </tr>
+
+ <DT>pred_type (optional)</DT>
+ <DD>TEXT, default: 'response'. The type of output
+ desired, where 'response' gives the actual prediction
+ and 'prob' gives the probability value for each class.
+ </DD>
+
+ <DT>use_gpus (optional)</DT>
+ <DD>BOOLEAN, default: FALSE (i.e., CPU). Determines
+ whether GPUs are to be used for prediction/inference.
+ Set to TRUE to use GPUs.
+
+ @note
+ The prediction function uses the whole cluster. If you are using GPUs, it
+ requires that GPUs are attached to all hosts, and that there are the same number
+ of GPUs on each host (homogeneous cluster). This is different from the fit()
+ and evaluate() functions that support GPUs on only some of the hosts (heterogeneous cluster).
+ Therefore, if you have GPUs only on some of the hosts, or an uneven numbers of GPUs per host, then
+ set this parameter to FALSE to use CPUs.
+ </DD>
+</DL>
+
+@anchor example
+@par Examples
+
+@note
+Deep learning works best on very large datasets,
+but that is not convenient for a quick introduction
+to the syntax. So in this example we use an MLP on the well
+known iris data set from https://archive.ics.uci.edu/ml/datasets/iris.
+For more realistic examples with images please refer
+to the deep learning notebooks
+at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts.
+
+<h4>Classification</h4>
+
+-# Create an input data set.
+<pre class="example">
+DROP TABLE IF EXISTS iris_data;
+CREATE TABLE iris_data(
+ id serial,
+ attributes numeric[],
+ class_text varchar
+);
+INSERT INTO iris_data(id, attributes, class_text) VALUES
+(1,ARRAY[5.1,3.5,1.4,0.2],'Iris-setosa'),
+(2,ARRAY[4.9,3.0,1.4,0.2],'Iris-setosa'),
+(3,ARRAY[4.7,3.2,1.3,0.2],'Iris-setosa'),
+(4,ARRAY[4.6,3.1,1.5,0.2],'Iris-setosa'),
+(5,ARRAY[5.0,3.6,1.4,0.2],'Iris-setosa'),
+(6,ARRAY[5.4,3.9,1.7,0.4],'Iris-setosa'),
+(7,ARRAY[4.6,3.4,1.4,0.3],'Iris-setosa'),
+(8,ARRAY[5.0,3.4,1.5,0.2],'Iris-setosa'),
+(9,ARRAY[4.4,2.9,1.4,0.2],'Iris-setosa'),
+(10,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
+(11,ARRAY[5.4,3.7,1.5,0.2],'Iris-setosa'),
+(12,ARRAY[4.8,3.4,1.6,0.2],'Iris-setosa'),
+(13,ARRAY[4.8,3.0,1.4,0.1],'Iris-setosa'),
+(14,ARRAY[4.3,3.0,1.1,0.1],'Iris-setosa'),
+(15,ARRAY[5.8,4.0,1.2,0.2],'Iris-setosa'),
+(16,ARRAY[5.7,4.4,1.5,0.4],'Iris-setosa'),
+(17,ARRAY[5.4,3.9,1.3,0.4],'Iris-setosa'),
+(18,ARRAY[5.1,3.5,1.4,0.3],'Iris-setosa'),
+(19,ARRAY[5.7,3.8,1.7,0.3],'Iris-setosa'),
+(20,ARRAY[5.1,3.8,1.5,0.3],'Iris-setosa'),
+(21,ARRAY[5.4,3.4,1.7,0.2],'Iris-setosa'),
+(22,ARRAY[5.1,3.7,1.5,0.4],'Iris-setosa'),
+(23,ARRAY[4.6,3.6,1.0,0.2],'Iris-setosa'),
+(24,ARRAY[5.1,3.3,1.7,0.5],'Iris-setosa'),
+(25,ARRAY[4.8,3.4,1.9,0.2],'Iris-setosa'),
+(26,ARRAY[5.0,3.0,1.6,0.2],'Iris-setosa'),
+(27,ARRAY[5.0,3.4,1.6,0.4],'Iris-setosa'),
+(28,ARRAY[5.2,3.5,1.5,0.2],'Iris-setosa'),
+(29,ARRAY[5.2,3.4,1.4,0.2],'Iris-setosa'),
+(30,ARRAY[4.7,3.2,1.6,0.2],'Iris-setosa'),
+(31,ARRAY[4.8,3.1,1.6,0.2],'Iris-setosa'),
+(32,ARRAY[5.4,3.4,1.5,0.4],'Iris-setosa'),
+(33,ARRAY[5.2,4.1,1.5,0.1],'Iris-setosa'),
+(34,ARRAY[5.5,4.2,1.4,0.2],'Iris-setosa'),
+(35,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
+(36,ARRAY[5.0,3.2,1.2,0.2],'Iris-setosa'),
+(37,ARRAY[5.5,3.5,1.3,0.2],'Iris-setosa'),
+(38,ARRAY[4.9,3.1,1.5,0.1],'Iris-setosa'),
+(39,ARRAY[4.4,3.0,1.3,0.2],'Iris-setosa'),
+(40,ARRAY[5.1,3.4,1.5,0.2],'Iris-setosa'),
+(41,ARRAY[5.0,3.5,1.3,0.3],'Iris-setosa'),
+(42,ARRAY[4.5,2.3,1.3,0.3],'Iris-setosa'),
+(43,ARRAY[4.4,3.2,1.3,0.2],'Iris-setosa'),
+(44,ARRAY[5.0,3.5,1.6,0.6],'Iris-setosa'),
+(45,ARRAY[5.1,3.8,1.9,0.4],'Iris-setosa'),
+(46,ARRAY[4.8,3.0,1.4,0.3],'Iris-setosa'),
+(47,ARRAY[5.1,3.8,1.6,0.2],'Iris-setosa'),
+(48,ARRAY[4.6,3.2,1.4,0.2],'Iris-setosa'),
+(49,ARRAY[5.3,3.7,1.5,0.2],'Iris-setosa'),
+(50,ARRAY[5.0,3.3,1.4,0.2],'Iris-setosa'),
+(51,ARRAY[7.0,3.2,4.7,1.4],'Iris-versicolor'),
+(52,ARRAY[6.4,3.2,4.5,1.5],'Iris-versicolor'),
+(53,ARRAY[6.9,3.1,4.9,1.5],'Iris-versicolor'),
+(54,ARRAY[5.5,2.3,4.0,1.3],'Iris-versicolor'),
+(55,ARRAY[6.5,2.8,4.6,1.5],'Iris-versicolor'),
+(56,ARRAY[5.7,2.8,4.5,1.3],'Iris-versicolor'),
+(57,ARRAY[6.3,3.3,4.7,1.6],'Iris-versicolor'),
+(58,ARRAY[4.9,2.4,3.3,1.0],'Iris-versicolor'),
+(59,ARRAY[6.6,2.9,4.6,1.3],'Iris-versicolor'),
+(60,ARRAY[5.2,2.7,3.9,1.4],'Iris-versicolor'),
+(61,ARRAY[5.0,2.0,3.5,1.0],'Iris-versicolor'),
+(62,ARRAY[5.9,3.0,4.2,1.5],'Iris-versicolor'),
+(63,ARRAY[6.0,2.2,4.0,1.0],'Iris-versicolor'),
+(64,ARRAY[6.1,2.9,4.7,1.4],'Iris-versicolor'),
+(65,ARRAY[5.6,2.9,3.6,1.3],'Iris-versicolor'),
+(66,ARRAY[6.7,3.1,4.4,1.4],'Iris-versicolor'),
+(67,ARRAY[5.6,3.0,4.5,1.5],'Iris-versicolor'),
+(68,ARRAY[5.8,2.7,4.1,1.0],'Iris-versicolor'),
+(69,ARRAY[6.2,2.2,4.5,1.5],'Iris-versicolor'),
+(70,ARRAY[5.6,2.5,3.9,1.1],'Iris-versicolor'),
+(71,ARRAY[5.9,3.2,4.8,1.8],'Iris-versicolor'),
+(72,ARRAY[6.1,2.8,4.0,1.3],'Iris-versicolor'),
+(73,ARRAY[6.3,2.5,4.9,1.5],'Iris-versicolor'),
+(74,ARRAY[6.1,2.8,4.7,1.2],'Iris-versicolor'),
+(75,ARRAY[6.4,2.9,4.3,1.3],'Iris-versicolor'),
+(76,ARRAY[6.6,3.0,4.4,1.4],'Iris-versicolor'),
+(77,ARRAY[6.8,2.8,4.8,1.4],'Iris-versicolor'),
+(78,ARRAY[6.7,3.0,5.0,1.7],'Iris-versicolor'),
+(79,ARRAY[6.0,2.9,4.5,1.5],'Iris-versicolor'),
+(80,ARRAY[5.7,2.6,3.5,1.0],'Iris-versicolor'),
+(81,ARRAY[5.5,2.4,3.8,1.1],'Iris-versicolor'),
+(82,ARRAY[5.5,2.4,3.7,1.0],'Iris-versicolor'),
+(83,ARRAY[5.8,2.7,3.9,1.2],'Iris-versicolor'),
+(84,ARRAY[6.0,2.7,5.1,1.6],'Iris-versicolor'),
+(85,ARRAY[5.4,3.0,4.5,1.5],'Iris-versicolor'),
+(86,ARRAY[6.0,3.4,4.5,1.6],'Iris-versicolor'),
+(87,ARRAY[6.7,3.1,4.7,1.5],'Iris-versicolor'),
+(88,ARRAY[6.3,2.3,4.4,1.3],'Iris-versicolor'),
+(89,ARRAY[5.6,3.0,4.1,1.3],'Iris-versicolor'),
+(90,ARRAY[5.5,2.5,4.0,1.3],'Iris-versicolor'),
+(91,ARRAY[5.5,2.6,4.4,1.2],'Iris-versicolor'),
+(92,ARRAY[6.1,3.0,4.6,1.4],'Iris-versicolor'),
+(93,ARRAY[5.8,2.6,4.0,1.2],'Iris-versicolor'),
+(94,ARRAY[5.0,2.3,3.3,1.0],'Iris-versicolor'),
+(95,ARRAY[5.6,2.7,4.2,1.3],'Iris-versicolor'),
+(96,ARRAY[5.7,3.0,4.2,1.2],'Iris-versicolor'),
+(97,ARRAY[5.7,2.9,4.2,1.3],'Iris-versicolor'),
+(98,ARRAY[6.2,2.9,4.3,1.3],'Iris-versicolor'),
+(99,ARRAY[5.1,2.5,3.0,1.1],'Iris-versicolor'),
+(100,ARRAY[5.7,2.8,4.1,1.3],'Iris-versicolor'),
+(101,ARRAY[6.3,3.3,6.0,2.5],'Iris-virginica'),
+(102,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'),
+(103,ARRAY[7.1,3.0,5.9,2.1],'Iris-virginica'),
+(104,ARRAY[6.3,2.9,5.6,1.8],'Iris-virginica'),
+(105,ARRAY[6.5,3.0,5.8,2.2],'Iris-virginica'),
+(106,ARRAY[7.6,3.0,6.6,2.1],'Iris-virginica'),
+(107,ARRAY[4.9,2.5,4.5,1.7],'Iris-virginica'),
+(108,ARRAY[7.3,2.9,6.3,1.8],'Iris-virginica'),
+(109,ARRAY[6.7,2.5,5.8,1.8],'Iris-virginica'),
+(110,ARRAY[7.2,3.6,6.1,2.5],'Iris-virginica'),
+(111,ARRAY[6.5,3.2,5.1,2.0],'Iris-virginica'),
+(112,ARRAY[6.4,2.7,5.3,1.9],'Iris-virginica'),
+(113,ARRAY[6.8,3.0,5.5,2.1],'Iris-virginica'),
+(114,ARRAY[5.7,2.5,5.0,2.0],'Iris-virginica'),
+(115,ARRAY[5.8,2.8,5.1,2.4],'Iris-virginica'),
+(116,ARRAY[6.4,3.2,5.3,2.3],'Iris-virginica'),
+(117,ARRAY[6.5,3.0,5.5,1.8],'Iris-virginica'),
+(118,ARRAY[7.7,3.8,6.7,2.2],'Iris-virginica'),
+(119,ARRAY[7.7,2.6,6.9,2.3],'Iris-virginica'),
+(120,ARRAY[6.0,2.2,5.0,1.5],'Iris-virginica'),
+(121,ARRAY[6.9,3.2,5.7,2.3],'Iris-virginica'),
+(122,ARRAY[5.6,2.8,4.9,2.0],'Iris-virginica'),
+(123,ARRAY[7.7,2.8,6.7,2.0],'Iris-virginica'),
+(124,ARRAY[6.3,2.7,4.9,1.8],'Iris-virginica'),
+(125,ARRAY[6.7,3.3,5.7,2.1],'Iris-virginica'),
+(126,ARRAY[7.2,3.2,6.0,1.8],'Iris-virginica'),
+(127,ARRAY[6.2,2.8,4.8,1.8],'Iris-virginica'),
+(128,ARRAY[6.1,3.0,4.9,1.8],'Iris-virginica'),
+(129,ARRAY[6.4,2.8,5.6,2.1],'Iris-virginica'),
+(130,ARRAY[7.2,3.0,5.8,1.6],'Iris-virginica'),
+(131,ARRAY[7.4,2.8,6.1,1.9],'Iris-virginica'),
+(132,ARRAY[7.9,3.8,6.4,2.0],'Iris-virginica'),
+(133,ARRAY[6.4,2.8,5.6,2.2],'Iris-virginica'),
+(134,ARRAY[6.3,2.8,5.1,1.5],'Iris-virginica'),
+(135,ARRAY[6.1,2.6,5.6,1.4],'Iris-virginica'),
+(136,ARRAY[7.7,3.0,6.1,2.3],'Iris-virginica'),
+(137,ARRAY[6.3,3.4,5.6,2.4],'Iris-virginica'),
+(138,ARRAY[6.4,3.1,5.5,1.8],'Iris-virginica'),
+(139,ARRAY[6.0,3.0,4.8,1.8],'Iris-virginica'),
+(140,ARRAY[6.9,3.1,5.4,2.1],'Iris-virginica'),
+(141,ARRAY[6.7,3.1,5.6,2.4],'Iris-virginica'),
+(142,ARRAY[6.9,3.1,5.1,2.3],'Iris-virginica'),
+(143,ARRAY[5.8,2.7,5.1,1.9],'Iris-virginica'),
+(144,ARRAY[6.8,3.2,5.9,2.3],'Iris-virginica'),
+(145,ARRAY[6.7,3.3,5.7,2.5],'Iris-virginica'),
+(146,ARRAY[6.7,3.0,5.2,2.3],'Iris-virginica'),
+(147,ARRAY[6.3,2.5,5.0,1.9],'Iris-virginica'),
+(148,ARRAY[6.5,3.0,5.2,2.0],'Iris-virginica'),
+(149,ARRAY[6.2,3.4,5.4,2.3],'Iris-virginica'),
+(150,ARRAY[5.9,3.0,5.1,1.8],'Iris-virginica');
+</pre>
+Create a test/validation dataset from the training data:
+<pre class="example">
+DROP TABLE IF EXISTS iris_train, iris_test;
+-- Set seed so results are reproducible
+SELECT setseed(0);
+SELECT madlib.train_test_split('iris_data', -- Source table
+ 'iris', -- Output table root name
+ 0.8, -- Train proportion
+ NULL, -- Test proportion (0.2)
+ NULL, -- Strata definition
+ NULL, -- Output all columns
+ NULL, -- Sample without replacement
+ TRUE -- Separate output tables
+ );
+SELECT COUNT(*) FROM iris_train;
+</pre>
+<pre class="result">
+ count
+------+
+ 120
+</pre>
+
+-# Call the preprocessor for deep learning. For the training dataset:
+<pre class="example">
+\\x on
+DROP TABLE IF EXISTS iris_train_packed, iris_train_packed_summary;
+SELECT madlib.training_preprocessor_dl('iris_train', -- Source table
+ 'iris_train_packed', -- Output table
+ 'class_text', -- Dependent variable
+ 'attributes' -- Independent variable
+ );
+SELECT * FROM iris_train_packed_summary;
+</pre>
+<pre class="result">
+-[ RECORD 1 ]-------+---------------------------------------------
+source_table | iris_train
+output_table | iris_train_packed
+dependent_varname | class_text
+independent_varname | attributes
+dependent_vartype | character varying
+class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
+buffer_size | 60
+normalizing_const | 1.0
+num_classes | 3
+</pre>
+For the validation dataset:
+<pre class="example">
+DROP TABLE IF EXISTS iris_test_packed, iris_test_packed_summary;
+SELECT madlib.validation_preprocessor_dl('iris_test', -- Source table
+ 'iris_test_packed', -- Output table
+ 'class_text', -- Dependent variable
+ 'attributes', -- Independent variable
+ 'iris_train_packed' -- From training preprocessor step
+ );
+SELECT * FROM iris_test_packed_summary;
+</pre>
+<pre class="result">
+-[ RECORD 1 ]-------+---------------------------------------------
+source_table | iris_test
+output_table | iris_test_packed
+dependent_varname | class_text
+independent_varname | attributes
+dependent_vartype | character varying
+class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
+buffer_size | 15
+normalizing_const | 1.0
+num_classes | 3
+</pre>
+
+-# Define and load model architecture. Use Keras to define
+the model architecture with 1 hidden layer:
+<pre class="example">
+import keras
+from keras.models import Sequential
+from keras.layers import Dense
+model1 = Sequential()
+model1.add(Dense(10, activation='relu', input_shape=(4,)))
+model1.add(Dense(10, activation='relu'))
+model1.add(Dense(3, activation='softmax'))
+model1.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type) Output Shape Param #
+=================================================================
+dense_1 (Dense) (None, 10) 50
+_________________________________________________________________
+dense_2 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_3 (Dense) (None, 3) 33
+=================================================================
+Total params: 193
+Trainable params: 193
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model1.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+Define model architecture with 2 hidden layers:
+<pre class="example">
+model2 = Sequential()
+model2.add(Dense(10, activation='relu', input_shape=(4,)))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(3, activation='softmax'))
+model2.summary()
+\verbatim
+
+Layer (type) Output Shape Param #
+=================================================================
+dense_4 (Dense) (None, 10) 50
+_________________________________________________________________
+dense_5 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_6 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_7 (Dense) (None, 3) 33
+=================================================================
+Total params: 303
+Trainable params: 303
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model2.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+Load into model architecture table:
+<pre class="example">
+DROP TABLE IF EXISTS model_arch_library;
+SELECT madlib.load_keras_model('model_arch_library', -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json, -- JSON blob
+ NULL, -- Weights
+ 'Sophie', -- Name
+ 'MLP with 1 hidden layer' -- Descr
+);
+SELECT madlib.load_keras_model('model_arch_library', -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json, -- JSON blob
+ NULL, -- Weights
+ 'Maria', -- Name
+ 'MLP with 2 hidden layers' -- Descr
+);
+</pre>
+-# Define model selection tuples and load. Select the model(s) from the model architecture
+table that you want to run, along with the compile and fit parameters. Combinations will be
+created for the set of model selection parameters will be loaded:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table, mst_table_summary;
+SELECT madlib.load_model_selection_table('model_arch_library', -- model architecture table
+ 'mst_table', -- model selection table output
+ ARRAY[1,2], -- model ids from model architecture table
+ ARRAY[ -- compile params
+ $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$,
+ $$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy']$$,
+ $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$
+ ],
+ ARRAY[ -- fit params
+ $$batch_size=4,epochs=1$$,
+ $$batch_size=8,epochs=1$$
+ ]
+ );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_id | compile_params | fit_params
+---------+----------+---------------------------------------------------------------------------------+-----------------------
+ 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(12 rows)
+</pre>
+This is the name of the model architecture table that corresponds to the model selection table:
+<pre class="example">
+SELECT * FROM mst_table_summary;
+</pre>
+<pre class="result">
+ model_arch_table
+--------------------+
+ model_arch_library
+</pre>
+
+-# Train multiple models.
+<pre class="example">
+DROP TABLE IF EXISTS iris_multi_model, iris_multi_model_summary, iris_multi_model_info;
+SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table
+ 'iris_multi_model', -- model_output_table
+ 'mst_table', -- model_selection_table
+ 10, -- num_iterations
+ FALSE -- use gpus
+ );
+</pre>
+View the model summary:
+<pre class="example">
+SELECT * FROM iris_multi_model_summary;
+</pre>
+<pre class="result">
+source_table | iris_train_packed
+validation_table |
+model | iris_multi_model
+model_info | iris_multi_model_info
+dependent_varname | class_text
+independent_varname | attributes
+model_arch_table | model_arch_library
+num_iterations | 10
+metrics_compute_frequency | 10
+warm_start | f
+name |
+description |
+start_training_time | 2019-12-16 18:54:33.826414
+end_training_time | 2019-12-16 18:56:19.106321
+madlib_version | 1.17-dev
+num_classes | 3
+class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
+dependent_vartype | character varying
+normalizing_const | 1
+metrics_iters | {10}
+</pre>
+View results for each model:
+<pre class="example">
+SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final;
+</pre>
+<pre class="result">
+ mst_key | model_id | compile_params | fit_params | model_type | model_size | metrics_elapsed_time | metrics_type | training_metrics_final | training_loss_final | training_metrics | training_loss | validation_metrics_final | validation_loss_final | validation_metrics | validation_loss
+---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+--------------+----------------------+--------------+------------------------+---------------------+---------------------+---------------------+--------------------------+-----------------------+--------------------+-----------------
+ 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.189763069152832} | {accuracy} | 0.983333349228 | 0.102392569184 | {0.983333349227905} | {0.102392569184303} | | | |
+ 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.170287847518921} | {accuracy} | 0.975000023842 | 0.159002527595 | {0.975000023841858} | {0.159002527594566} | | | |
+ 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.165465116500854} | {accuracy} | 0.966666638851 | 0.10245500505 | {0.966666638851166} | {0.102455005049706} | | | |
+ 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.199872970581055} | {accuracy} | 0.941666662693 | 0.12242924422 | {0.941666662693024} | {0.122429244220257} | | | |
+ 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.16815185546875} | {accuracy} | 0.883333325386 | 0.437314987183 | {0.883333325386047} | {0.437314987182617} | | | |
+ 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.430488109588623} | {accuracy} | 0.858333349228 | 0.400548309088 | {0.858333349227905} | {0.400548309087753} | | | |
+ 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.154508113861084} | {accuracy} | 0.683333337307 | 0.634458899498 | {0.683333337306976} | {0.634458899497986} | | | |
+ 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.192011833190918} | {accuracy} | 0.683333337307 | 0.792817175388 | {0.683333337306976} | {0.792817175388336} | | | |
+ 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.241909980773926} | {accuracy} | 0.641666650772 | 0.736794412136 | {0.641666650772095} | {0.736794412136078} | | | |
+ 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.186789035797119} | {accuracy} | 0.358333319426 | 1.09771859646 | {0.358333319425583} | {1.09771859645844} | | | |
+ 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.186538934707642} | {accuracy} | 0.358333319426 | 1.1002266407 | {0.358333319425583} | {1.10022664070129} | | | |
+ 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.159209012985229} | {accuracy} | 0.324999988079 | 10.8797140121 | {0.324999988079071} | {10.879714012146} | | | |
+(12 rows)
+</pre>
+
+-# Evaluate. Now run evaluate using models we built above:
+<pre class="example">
+DROP TABLE IF EXISTS iris_validate;
+SELECT madlib.madlib_keras_evaluate('iris_multi_model', -- model
+ 'iris_test_packed', -- test table
+ 'iris_validate', -- output table
+ NULL, -- use gpus
+ 3 -- mst_key to use
+ );
+SELECT * FROM iris_validate;
+</pre>
+<pre class="result">
+ loss | metric | metrics_type
+-------------------+-------------------+--------------
+ 0.103803977370262 | 0.966666638851166 | {accuracy}
+</pre>
+
+-# Predict. Now predict using one of the models we built. We will use the validation data set
+for prediction as well, which is not usual but serves to show the syntax.
+The prediction is in the estimated_class_text column:
+<pre class="example">
+DROP TABLE IF EXISTS iris_predict;
+SELECT madlib.madlib_keras_predict('iris_multi_model', -- model
+ 'iris_test', -- test_table
+ 'id', -- id column
+ 'attributes', -- independent var
+ 'iris_predict', -- output table
+ 'response', -- prediction type
+ FALSE, -- use gpus
+ 3 -- mst_key to use
+ );
+SELECT * FROM iris_predict ORDER BY id;
+</pre>
+<pre class="result">
+ id | estimated_class_text
+-----+----------------------
+ 9 | Iris-setosa
+ 18 | Iris-setosa
+ 22 | Iris-setosa
+ 26 | Iris-setosa
+ 35 | Iris-setosa
+ 38 | Iris-setosa
+ 42 | Iris-setosa
+ 43 | Iris-setosa
+ 45 | Iris-setosa
+ 46 | Iris-setosa
+ 50 | Iris-setosa
+ 53 | Iris-versicolor
+ 60 | Iris-versicolor
+ 68 | Iris-versicolor
+ 77 | Iris-versicolor
+ 78 | Iris-versicolor
+ 79 | Iris-versicolor
+ 81 | Iris-versicolor
+ 82 | Iris-versicolor
+ 85 | Iris-virginica
+ 95 | Iris-versicolor
+ 97 | Iris-versicolor
+ 98 | Iris-versicolor
+ 113 | Iris-virginica
+ 117 | Iris-virginica
+ 118 | Iris-virginica
+ 127 | Iris-virginica
+ 136 | Iris-virginica
+ 143 | Iris-virginica
+ 145 | Iris-virginica
+(30 rows)
+</pre>
+Count missclassifications:
+<pre class="example">
+SELECT COUNT(*) FROM iris_predict JOIN iris_test USING (id)
+WHERE iris_predict.estimated_class_text != iris_test.class_text;
+</pre>
+<pre class="result">
+ count
+-------+
+ 1
+</pre>
+Percent missclassifications:
+<pre class="example">
+SELECT round(count(*)*100/(150*0.2),2) as test_accuracy_percent from
+ (select iris_test.class_text as actual, iris_predict.estimated_class_text as estimated
+ from iris_predict inner join iris_test
+ on iris_test.id=iris_predict.id) q
+WHERE q.actual=q.estimated;
+</pre>
+<pre class="result">
+ test_accuracy_percent
+-----------------------+
+ 96.67
+</pre>
+
+<h4>Classification with Other Parameters</h4>
+
+-# Validation dataset. Now use a validation dataset
+and compute metrics every 3rd iteration using
+the 'metrics_compute_frequency' parameter. This can
+help reduce run time if you do not need metrics
+computed at every iteration.
+<pre class="example">
+DROP TABLE IF EXISTS iris_multi_model, iris_multi_model_summary, iris_multi_model_info;
+SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table
+ 'iris_multi_model', -- model_output_table
+ 'mst_table', -- model_selection_table
+ 10, -- num_iterations
+ FALSE, -- use gpus
+ 'iris_test_packed', -- validation dataset
+ 3, -- metrics compute frequency
+ FALSE, -- warm start
+ 'Sophie L.', -- name
+ 'Model selection for iris dataset' -- description
+ );
+</pre>
+View the model summary:
+<pre class="example">
+SELECT * FROM iris_multi_model_summary;
+</pre>
+<pre class="result">
+source_table | iris_train_packed
+validation_table | iris_test_packed
+model | iris_multi_model
+model_info | iris_multi_model_info
+dependent_varname | class_text
+independent_varname | attributes
+model_arch_table | model_arch_library
+num_iterations | 10
+metrics_compute_frequency | 3
+warm_start | f
+name | Sophie L.
+description | Model selection for iris dataset
+start_training_time | 2019-12-16 19:28:16.219137
+end_training_time | 2019-12-16 19:30:19.238692
+madlib_version | 1.17-dev
+num_classes | 3
+class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
+dependent_vartype | character varying
+normalizing_const | 1
+metrics_iters | {3,6,9,10}
+</pre>
+View results for each model:
+<pre class="example">
+SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final;
+</pre>
+<pre class="result">
+ mst_key | model_id | compile_params | fit_params | model_type | model_size | metrics_elapsed_time | metrics_type | training_metrics_final | training_loss_final | training_metrics | training_loss | validation_metrics_final | validation_loss_ [...]
+---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+--------------+---------------------------------------------------------------------------+--------------+------------------------+---------------------+---------------------------------------------------------------------------+----------------------------------------------------------------------------+--------------------------+----------------- [...]
+ 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.19480299949646,0.184041976928711,0.191921949386597,0.24904990196228} | {accuracy} | 0.975000023842 | 0.107221193612 | {0.975000023841858,0.883333325386047,0.858333349227905,0.975000023841858} | {0.348870366811752,0.250929206609726,0.251643180847168,0.107221193611622} | 1 | 0.07454025 [...]
+ 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.168098926544189,0.157669067382812,0.196551084518433,0.159118890762329} | {accuracy} | 0.925000011921 | 0.15261271596 | {0.883333325386047,0.808333337306976,0.958333313465118,0.925000011920929} | {0.323819071054459,0.359202563762665,0.145684525370598,0.152612715959549} | 0.966666638851 | 0.09096869 [...]
+ 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.196630954742432,0.182199001312256,0.186440944671631,0.203809022903442} | {accuracy} | 0.933333337307 | 0.50553715229 | {0.725000023841858,0.758333325386047,0.858333349227905,0.933333337306976} | {0.884528338909149,0.746422350406647,0.588649451732635,0.505537152290344} | 0.933333337307 | 0.5527370 [...]
+ 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.243091106414795,0.183151960372925,0.201867818832397,0.182805061340332} | {accuracy} | 0.891666650772 | 0.208901122212 | {0.850000023841858,0.958333313465118,0.975000023841858,0.891666650772095} | {0.408110946416855,0.144096985459328,0.0838083922863007,0.20890112221241} | 0.866666674614 | 0.2708076 [...]
+ 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.15792989730835,0.171962976455688,0.226946830749512,0.161045074462891} | {accuracy} | 0.850000023842 | 0.480257481337 | {0.616666674613953,0.683333337306976,0.75,0.850000023841858} | {0.948418378829956,0.743716657161713,0.534102499485016,0.480257481336594} | 0.800000011921 | 0.5156010 [...]
+ 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.184230089187622,0.20333194732666,0.191611051559448,0.187481880187988} | {accuracy} | 0.883333325386 | 0.341951817274 | {0.658333361148834,0.941666662693024,0.958333313465118,0.883333325386047} | {0.442611187696457,0.403654724359512,0.0869002863764763,0.341951817274094} | 0.800000011921 | 0.4072133 [...]
+ 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.169033050537109,0.160851001739502,0.172677993774414,0.166024923324585} | {accuracy} | 0.641666650772 | 0.474238961935 | {0.899999976158142,0.949999988079071,0.783333361148834,0.641666650772095} | {0.255419433116913,0.1908118724823,0.767927944660187,0.474238961935043} | 0.766666650772 | 0.4262402 [...]
+ 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.163635015487671,0.217028856277466,0.412152051925659,0.163925886154175} | {accuracy} | 0.841666638851 | 0.351104289293 | {0.866666674613953,0.916666686534882,0.941666662693024,0.841666638851166} | {0.341254085302353,0.210138097405434,0.152032792568207,0.351104289293289} | 0.733333349228 | 0.4624978 [...]
+ 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.160864114761353,0.515794038772583,0.170866966247559,0.162554025650024} | {accuracy} | 0.774999976158 | 0.372270941734 | {0.608333349227905,0.941666662693024,0.774999976158142,0.774999976158142} | {0.541361212730408,0.23515048623085,0.341034829616547,0.372270941734314} | 0.699999988079 | 0.4999965 [...]
+ 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.184638977050781,0.180385828018188,0.206297159194946,0.186070919036865} | {accuracy} | 0.691666662693 | 0.721061587334 | {0.25,0.675000011920929,0.733333349227905,0.691666662693024} | {0.992778599262238,0.875843703746796,0.761732041835785,0.721061587333679} | 0.600000023842 | 0.7364071 [...]
+ 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.154546976089478,0.16751503944397,0.162903070449829,0.158049821853638} | {accuracy} | 0.683333337307 | 0.7804261446 | {0.358333319425583,0.358333319425583,0.675000011920929,0.683333337306976} | {1.20112609863281,0.947584271430969,0.819790959358215,0.780426144599915} | 0.600000023842 | 0.8485705 [...]
+ 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.19166898727417,0.202822923660278,0.192266941070557,0.188740015029907} | {accuracy} | 0.358333319426 | 1.11373853683 | {0.358333319425583,0.358333319425583,0.358333319425583,0.358333319425583} | {1.10469853878021,1.10379803180695,1.1021580696106,1.11373853683472} | 0.233333334327 | 1.165892 [...]
+(12 rows)
+</pre>
+
+-# Predict probabilities for each class:
+<pre class="example">
+DROP TABLE IF EXISTS iris_predict;
+SELECT madlib.madlib_keras_predict('iris_multi_model', -- model
+ 'iris_test', -- test_table
+ 'id', -- id column
+ 'attributes', -- independent var
+ 'iris_predict', -- output table
+ 'prob', -- prediction type
+ FALSE, -- use gpus
+ 3 -- mst_key to use
+ );
+SELECT * FROM iris_predict ORDER BY id;
+</pre>
+<pre class="result">
+ id | prob_Iris-setosa | prob_Iris-versicolor | prob_Iris-virginica
+-----+------------------+----------------------+---------------------
+ 9 | 0.99931216 | 0.00068789057 | 6.2587335e-10
+ 18 | 0.99984336 | 0.00015656587 | 7.969957e-12
+ 22 | 0.9998497 | 0.00015029701 | 6.4133347e-12
+ 26 | 0.9995004 | 0.00049964694 | 2.2795305e-10
+ 35 | 0.99964666 | 0.00035332117 | 9.4490485e-11
+ 38 | 0.99964666 | 0.00035332117 | 9.4490485e-11
+ 42 | 0.9985154 | 0.0014845316 | 5.293262e-09
+ 43 | 0.99964476 | 0.0003552362 | 9.701174e-11
+ 45 | 0.9997311 | 0.00026883607 | 3.076166e-11
+ 46 | 0.9995486 | 0.00045140853 | 1.6814435e-10
+ 50 | 0.9997856 | 0.00021441824 | 2.1316622e-11
+ 53 | 9.837335e-06 | 0.97109175 | 0.028898403
+ 60 | 0.00014028326 | 0.96552837 | 0.034331344
+ 68 | 0.00087942625 | 0.9883348 | 0.010785843
+ 77 | 6.08114e-06 | 0.94356424 | 0.056429718
+ 78 | 7.116364e-07 | 0.8596206 | 0.14037873
+ 79 | 1.3918722e-05 | 0.94052655 | 0.05945957
+ 81 | 0.00045687397 | 0.9794796 | 0.020063542
+ 82 | 0.0015463434 | 0.98768973 | 0.010763981
+ 85 | 1.0929693e-05 | 0.87866926 | 0.121319845
+ 95 | 6.3600986e-05 | 0.95264935 | 0.047287125
+ 97 | 0.00020298029 | 0.981617 | 0.018180028
+ 98 | 0.00019721613 | 0.98902065 | 0.01078211
+ 113 | 1.0388683e-09 | 0.23626474 | 0.7637353
+ 117 | 4.598902e-09 | 0.25669694 | 0.7433031
+ 118 | 3.7139156e-11 | 0.13193987 | 0.8680601
+ 127 | 2.1297862e-07 | 0.670349 | 0.32965073
+ 136 | 7.1760774e-12 | 0.07074605 | 0.929254
+ 143 | 1.2568385e-09 | 0.113820426 | 0.8861796
+ 145 | 6.17019e-11 | 0.117578305 | 0.88242173
+(30 rows)
+</pre>
+
+-# Warm start. Next, use the warm_start parameter
+to continue learning, using the coefficients from
+the run above. Note that we don't drop the
+model table or model summary table:
+<pre class="example">
+SELECT madlib.madlib_keras_fit_multiple_model('iris_train_packed', -- source_table
+ 'iris_multi_model', -- model_output_table
+ 'mst_table', -- model_selection_table
+ 3, -- num_iterations
+ FALSE, -- use gpus
+ 'iris_test_packed', -- validation dataset
+ 1, -- metrics compute frequency
+ TRUE, -- warm start
+ 'Sophie L.', -- name
+ 'Simple MLP for iris dataset' -- description
+ );
+SELECT * FROM iris_multi_model_summary;
+</pre>
+<pre class="result">
+source_table | iris_train_packed
+validation_table | iris_test_packed
+model | iris_multi_model
+model_info | iris_multi_model_info
+dependent_varname | class_text
+independent_varname | attributes
+model_arch_table | model_arch_library
+num_iterations | 3
+metrics_compute_frequency | 1
+warm_start | t
+name | Sophie L.
+description | Simple MLP for iris dataset
+start_training_time | 2019-12-16 20:07:41.488587
+end_training_time | 2019-12-16 20:08:27.20651
+madlib_version | 1.17-dev
+num_classes | 3
+class_values | {Iris-setosa,Iris-versicolor,Iris-virginica}
+dependent_vartype | character varying
+normalizing_const | 1
+metrics_iters | {1,2,3}
+</pre>
+View results for each model:
+<pre class="example">
+SELECT * FROM iris_multi_model_info ORDER BY training_metrics_final DESC, training_loss_final;
+</pre>
+<pre class="result">
+ mst_key | model_id | compile_params | fit_params | model_type | model_size | metrics_elapsed_time | metrics_type | training_metrics_final | training_loss_final | training_metrics | training_loss | validation_metrics_final | validation_loss_final | validation_metrics [...]
+---------+----------+---------------------------------------------------------------------------------+-----------------------+--------------+--------------+---------------------------------------------------------+--------------+------------------------+---------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------+-----------------------+---------------------------------------------- [...]
+ 3 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.164853096008301,0.264705181121826,0.157214879989624} | {accuracy} | 0.966666638851 | 0.0860336050391 | {0.966666638851166,0.875,0.966666638851166} | {0.10731378942728,0.237401679158211,0.0860336050391197} | 1 | 0.0690980628133 | {1,0.933333337306976,1} [...]
+ 9 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.182960987091064,0.192347049713135,0.185945987701416} | {accuracy} | 0.941666662693 | 0.16336736083 | {0.966666638851166,0.975000023841858,0.941666662693024} | {0.0924900621175766,0.0709503665566444,0.163367360830307} | 0.933333337307 | 0.196538448334 | {0.933333337306976,1,0.933333337306976} [...]
+ 11 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.187994003295898,0.181873798370361,0.188920021057129} | {accuracy} | 0.899999976158 | 0.329349696636 | {0.908333361148834,0.883333325386047,0.899999976158142} | {0.373963922262192,0.349062085151672,0.3293496966362} | 0.933333337307 | 0.333393543959 | {0.933333337306976,0.899999976158142,0.933333 [...]
+ 10 | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.184596061706543,0.379925012588501,0.186527013778687} | {accuracy} | 0.908333361149 | 0.202177524567 | {0.866666674613953,0.966666638851166,0.908333361148834} | {0.250507324934006,0.127185359597206,0.20217752456665} | 0.866666674614 | 0.268144398928 | {0.800000011920929,1,0.866666674613953} [...]
+ 12 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.187471866607666,0.19880199432373,0.180361032485962} | {accuracy} | 0.908333361149 | 0.408491075039 | {0.833333313465118,0.925000011920929,0.908333361148834} | {0.492850959300995,0.451007574796677,0.40849107503891} | 0.833333313465 | 0.449156552553 | {0.800000011920929,0.866666674613953,0.833333 [...]
+ 8 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 1.2197265625 | {0.187476873397827,0.190106868743896,0.183273077011108} | {accuracy} | 0.916666686535 | 0.204672813416 | {0.925000011920929,0.725000023841858,0.916666686534882} | {0.224613606929779,0.507380962371826,0.204672813415527} | 0.833333313465 | 0.253615111113 | {0.833333313465118,0.666666686534882,0.833333 [...]
+ 4 | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.158944129943848,0.190206050872803,0.162253141403198} | {accuracy} | 0.875 | 0.289537638426 | {0.908333361148834,0.949999988079071,0.875} | {0.177524402737617,0.134268626570702,0.289537638425827} | 0.800000011921 | 0.416592538357 | {0.933333337306976,0.933333337306976,0.800000 [...]
+ 6 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.17385196685791,0.167200088500977,0.15508508682251} | {accuracy} | 0.850000023842 | 0.874475240707 | {0.683333337306976,0.683333337306976,0.850000023841858} | {0.942066073417664,0.909405112266541,0.874475240707397} | 0.800000011921 | 0.901099026203 | {0.600000023841858,0.600000023841858,0.800000 [...]
+ 5 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.175297975540161,0.166980028152466,0.157662153244019} | {accuracy} | 0.791666686535 | 0.473440766335 | {0.691666662693024,0.758333325386047,0.791666686534882} | {0.555641710758209,0.510370552539825,0.473440766334534} | 0.699999988079 | 0.532045960426 | {0.600000023841858,0.666666686534882,0.699999 [...]
+ 7 | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 1.2197265625 | {0.370399951934814,0.185209989547729,0.184471845626831} | {accuracy} | 0.683333337307 | 0.467345923185 | {0.683333337306976,0.683333337306976,0.683333337306976} | {0.476419776678085,0.466614902019501,0.467345923185349} | 0.600000023842 | 0.441450744867 | {0.600000023841858,0.600000023841858,0.600000 [...]
+ 1 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1 | madlib_keras | 0.7900390625 | {0.161419153213501,0.168106079101562,0.156718969345093} | {accuracy} | 0.358333319426 | 1.11533606052 | {0.358333319425583,0.358333319425583,0.358333319425583} | {1.1051013469696,1.11064600944519,1.11533606052399} | 0.233333334327 | 1.15986251831 | {0.233333334326744,0.233333334326744,0.233333 [...]
+ 2 | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1 | madlib_keras | 0.7900390625 | {0.1615891456604,0.159286975860596,0.165864944458008} | {accuracy} | 0.358333319426 | 1.11393201351 | {0.358333319425583,0.358333319425583,0.358333319425583} | {1.11442768573761,1.10136640071869,1.11393201351166} | 0.233333334327 | 1.17370998859 | {0.233333334326744,0.233333334326744,0.233333 [...]
+(12 rows)
+</pre>
+Note that the loss and accuracy values pick up from where the previous run left off.
+
+@anchor notes
+@par Notes
+
+1. Refer to the deep learning section of the Apache MADlib
+wiki [6] for important information including supported libraries
+and versions.
+
+2. Classification is currently supported, not regression.
+
+3. On the effect of database cluster size: as the database cluster
+size increases, it will be proportionally faster to train a set of
+models, as long as you have at least as many model selection tuples
+as segments. This is because model state is "hopped" from
+segment to segment and training takes place in parallel. See [1,2]
+for details on how model hopping works. If you have fewer model selection
+tuples to train than segments, then some segments may not be busy 100%
+of the time so speedup will not necessarily be linear with database
+cluster size. Inference (predict) is an embarrassingly parallel
+operation so inference runtimes will be proportionally faster as the number
+of segments increases.
+
+@anchor background
+@par Technical Background
+
+For an introduction to deep learning foundations, including MLP and CNN,
+refer to [7].
+
+@anchor literature
+@literature
+
+@anchor mlp-lit-1
+[1] "Cerebro: Efficient and Reproducible Model Selection on Deep Learning Systems,"
+Supun Nakandala, Yuhao Zhang, and Arun Kumar, ACM SIGMOD 2019 DEEM Workshop,
+https://adalabucsd.github.io/papers/2019_Cerebro_DEEM.pdf
+
+[2] Resource-Efficient and Reproducible Model Selection on Deep Learning Systems,"
+Supun Nakandala, Yuhao Zhang, and Arun Kumar, Technical Report, Computer Science and
+Engineering, University of California, San Diego
+https://adalabucsd.github.io/papers/TR_2019_Cerebro.pdf
+
+[3] https://keras.io/
+
+[4] https://www.tensorflow.org/
+
+[5] "Neural Networks for Machine Learning", Lectures 6a and 6b on mini-batch gradient descent,
+Geoffrey Hinton with Nitish Srivastava and Kevin Swersky,
+http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
+
+[6] Deep learning section of Apache MADlib wiki, https://cwiki.apache.org/confluence/display/MADLIB/Deep+Learning
+
+[7] Deep Learning, Ian Goodfellow, Yoshua Bengio and Aaron Courville, MIT Press, 2016.
+
+@anchor related
+@par Related Topics
+
+File madlib_keras_fit_multiple_model.sql_in documents training, evaluate and predict functions.
+
+*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit_multiple_model(
source_table VARCHAR,
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in
index 94bb121..d2418e4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.sql_in
@@ -31,7 +31,7 @@ m4_include(`SQLCommon.m4')
/**
@addtogroup grp_gpu_configuration
-@brief Utility function to report number and type of GPUs on the database cluster.
+@brief Utility function to report number and type of GPUs in the database cluster.
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index d776efd..8697ec4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -28,10 +28,10 @@
m4_include(`SQLCommon.m4')
/**
-@addtogroup grp_keras_model_selection
+@addtogroup grp_keras_setup_model_selection
-@brief Utility function to set up a model selection table
-for hyperparameter tuning and model architecture search.
+@brief Utility function to set up a model selection table for model architecture search
+and hyperparameter tuning.
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>