You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by fm...@apache.org on 2019/09/05 00:06:50 UTC
[madlib] 01/01: user docs for setting up model selection table
This is an automated email from the ASF dual-hosted git repository.
fmcquillan pushed a commit to branch load_mst_user_docs
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 2059ddbbe7c7fe9a2d2fd76f20796285a42da022
Author: Frank McQuillan <fm...@pivotal.io>
AuthorDate: Wed Sep 4 17:03:03 2019 -0700
user docs for setting up model selection table
---
doc/mainpage.dox.in | 2 +
.../madlib_keras_model_selection.sql_in | 336 +++++++++++++++++++++
2 files changed, 338 insertions(+)
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index daedce5..b31dedb 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -13,6 +13,7 @@ Useful links:
<li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
<li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
<li>User documentation for earlier releases:
+ <a href="../v1.16/index.html">v1.15</a>,
<a href="../v1.15.1/index.html">v1.15.1</a>,
<a href="../v1.15/index.html">v1.15</a>,
<a href="../v1.14/index.html">v1.14</a>,
@@ -292,6 +293,7 @@ Interface and implementation are subject to change.
@defgroup grp_keras Keras
@defgroup grp_keras_model_arch Load Model
@defgroup grp_input_preprocessor_dl Preprocessor for Images
+ @defgroup grp_keras_model_selection Setup Model Selection
@}
@defgroup grp_bayes Naive Bayes Classification
@defgroup grp_sample Random Sampling
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index f26a541..37914d4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -27,7 +27,343 @@
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_keras_model_selection
+@brief Utility function to set up a model selection table
+for hyperparameter tuning and model architecture search.
+
+\warning <em> This MADlib method is still in early stage development.
+Interface and implementation are subject to change. </em>
+
+<div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#load_mst_table">Load Model Selection Table</a></li>
+<li class="level1"><a href="#example">Examples</a></li>
+<li class="level1"><a href="#related">Related Topics</a></li>
+</ul></div>
+
+This utility function sets up a model selection table
+for use by the multiple model Keras fit feature of MADlib.
+By model selection we mean both hyperparameter tuning and
+model architecture search. The table defines the unique combinations
+of model architectures, compile and fit parameters for the tests
+to run on a massively parallel processing database cluster.
+
+@anchor load_mst_table
+@par Load Model Selection Table
+
+<pre class="syntax">
+load_model_selection_table(
+ model_arch_table,
+ model_selection_table,
+ model_arch_id_list,
+ compile_params_list,
+ fit_params_list
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+ <dt>model_arch_table</dt>
+ <dd>VARCHAR. Table containing model architectures and weights.
+ For more information on this table
+ refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+ </dd>
+
+ <dt>model_selection_table</dt>
+ <dd>VARCHAR. Model selection table created by this utility. Content of
+ this table is described below.
+ </dd>
+
+ <dt>model_arch_id_list</dt>
+ <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
+ in the run combinations. For hyperparameter search, this will typically be
+ one model ID. For model architecture search, this will be the different model IDs
+ that you want to test.
+ </dd>
+
+ <dt>compile_params_list</dt>
+ <dd>VARCHAR[]. Array of compile parameters to be tested. Each element
+ of the array should consist of a string of compile parameters
+ exactly as it is to be passed to Keras.
+ </dd>
+
+ <dt>fit_params_list</dt>
+ <dd>VARCHAR[]. Array of fit parameters to be tested. Each element
+ of the array should consist of a string of fit parameters
+ exactly as it is to be passed to Keras.
+ </dd>
+
+</dl>
+
+<b>Output table</b>
+<br>
+ The model selection output table contains the following columns:
+ <table class="output">
+ <tr>
+ <th>mst_key</th>
+ <td>INTEGER. ID that defines a unique
+ model architecture-compile parameters-fit parameters tuple.
+ </td>
+ </tr>
+ <tr>
+ <th>model_arch_table</th>
+ <td>VARCHAR. Name of the table corresponding to the model architecture ID.
+ </td>
+ </tr>
+ <tr>
+ <th>model_arch_id</th>
+ <td>VARCHAR. Model architecture ID from the 'model_arch_table'.
+ </td>
+ </tr>
+ <tr>
+ <th>compile_params</th>
+ <td>VARCHAR. Keras compile parameters.
+ </td>
+ </tr>
+ <tr>
+ <th>fit_params</th>
+ <td>VARCHAR. Keras fit parameters.
+ </td>
+ </tr>
+ </table>
+</br>
+
+@anchor example
+@par Examples
+-# The model selection table works in conjunction with a model architecture table,
+so we first create a model architecture table with two different models. Use Keras to define
+a model architecture with 1 hidden layer:
+<pre class="example">
+import keras
+from keras.models import Sequential
+from keras.layers import Dense
+model1 = Sequential()
+model1.add(Dense(10, activation='relu', input_shape=(4,)))
+model1.add(Dense(10, activation='relu'))
+model1.add(Dense(3, activation='softmax'))
+model1.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type) Output Shape Param #
+=================================================================
+dense_1 (Dense) (None, 10) 50
+_________________________________________________________________
+dense_2 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_3 (Dense) (None, 3) 33
+=================================================================
+Total params: 193
+Trainable params: 193
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model1.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+Now use Keras to define
+a model architecture with 2 hidden layers:
+<pre class="example">
+model2 = Sequential()
+model2.add(Dense(10, activation='relu', input_shape=(4,)))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(3, activation='softmax'))
+model2.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type) Output Shape Param #
+=================================================================
+dense_4 (Dense) (None, 10) 50
+_________________________________________________________________
+dense_5 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_6 (Dense) (None, 10) 110
+_________________________________________________________________
+dense_7 (Dense) (None, 3) 33
+=================================================================
+Total params: 303
+Trainable params: 303
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model2.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+
+-# Load both models into the architecture table:
+<pre class="example">
+DROP TABLE IF EXISTS model_arch_library;
+SELECT madlib.load_keras_model('model_arch_library', -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json, -- JSON blob
+ NULL, -- Weights
+ 'Sophie', -- Name
+ 'MLP with 1 hidden layer' -- Descr
+);
+SELECT madlib.load_keras_model('model_arch_library', -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json, -- JSON blob
+ NULL, -- Weights
+ 'Maria', -- Name
+ 'MLP with 2 hidden layers' -- Descr
+);
+SELECT model_id, name, description FROM model_arch_library ORDER BY model_id;
+</pre>
+<pre class="result">
+ model_id | name | description
+----------+--------+--------------------------
+ 1 | Sophie | MLP with 1 hidden layer
+ 2 | Maria | MLP with 2 hidden layers
+(2 rows)
+</pre>
+
+-# Load model selection table. Select the model(s) from the model
+architecture table that you want to run, along with the compile and
+fit parameters. Unique combinations will be created for the set of
+model selection parameters:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table;
+SELECT madlib.load_model_selection_table('model_arch_library', -- model architecture table
+ 'mst_table', -- model selection table output
+ ARRAY[1,2], -- model ids from model architecture table
+ ARRAY[ -- compile params
+ $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$,
+ $$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy']$$,
+ $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$
+ ],
+ ARRAY[ -- fit params
+ $$batch_size=4,epochs=1$$,
+ $$batch_size=8,epochs=1$$
+ ]
+ );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_arch_table | model_arch_id | compile_params | fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+ 1 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 2 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 3 | model_arch_library | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 4 | model_arch_library | 1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 5 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 6 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 7 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 8 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 9 | model_arch_library | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 10 | model_arch_library | 2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 11 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 12 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(12 rows)
+</pre>
+
+-# Create model selection table manually. If you would like to have
+more control over the set of model selection parameters to run,
+you can manually create the model selection table.
+For example, let's say we don't want all combinations but only
+want 'batch_size=4' for 'model_id=1' and 'batch_size=8' for 'model_id=2':
+<pre class="example">
+DROP TABLE IF EXISTS mst_table_manual;
+CREATE TABLE mst_table_manual(
+ mst_key serial,
+ model_arch_table varchar,
+ model_arch_id integer,
+ compile_params varchar,
+ fit_params varchar
+);
+INSERT INTO mst_table_manual(model_arch_table, model_arch_id, compile_params, fit_params) VALUES
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=8,epochs=1');
+SELECT * FROM mst_table_manual ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_arch_table | model_arch_id | compile_params | fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+ 1 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 2 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 3 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+ 4 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 5 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 6 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(6 rows)
+</pre>
+
+-# Generate hyperparameters automatically. You can use other libraries or methods
+to generate hyperparameters according to the tests that you want to run.
+For example, let's randomly generate batch size from powers of 2 and learning rate on a log scale.
+We use psycopg which is a PostgreSQL database adapter for the Python programming language.
+<pre class="example">
+import numpy as np
+import psycopg2 as p2
+conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')
+#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')
+cur = conn.cursor()
+\#
+%sql DROP TABLE IF EXISTS mst_table_auto;
+\#
+\#compile params
+learning_rate = np.random.permutation([0.1,0.01,0.001,0.0001])[:3]
+compile_param1 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[0]) + ")',metrics=['accuracy']"
+compile_param2 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[1]) + ")',metrics=['accuracy']"
+compile_param3 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[2]) + ")',metrics=['accuracy']"
+compile_params = [compile_param1,compile_param2,compile_param3]
+\#
+\#fit params
+batch_size = np.random.permutation([4,8,16,32,64])[:2]
+fit_param1 = "batch_size=" + str(batch_size[0]) + ",epochs=1"
+fit_param2 = "batch_size=" + str(batch_size[1]) + ",epochs=1"
+fit_params = [fit_param1,fit_param2]
+\#
+query = "SELECT madlib.load_model_selection_table('model_arch_library', 'mst_table_auto', ARRAY[1,2], %s, %s);"
+\#
+cur.execute(query,[compile_params, fit_params])
+conn.commit()
+\#
+\# review model selection table
+%sql SELECT * FROM mst_table_auto ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key | model_arch_table | model_arch_id | compile_params | fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+------------------------
+ 1 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 2 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=64,epochs=1
+ 3 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 4 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=64,epochs=1
+ 5 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 6 | model_arch_library | 1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+ 7 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 8 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy'] | batch_size=64,epochs=1
+ 9 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 10 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=64,epochs=1
+ 11 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+ 12 | model_arch_library | 2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+(12 rows)
+</pre>
+
+@anchor related
+@par Related Topics
+
+See keras_model_arch_table.sql_in
+
+*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_model_selection_table(
model_arch_table VARCHAR,