You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by fm...@apache.org on 2019/09/05 00:06:50 UTC

[madlib] 01/01: user docs for setting up model selection table

This is an automated email from the ASF dual-hosted git repository.

fmcquillan pushed a commit to branch load_mst_user_docs
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 2059ddbbe7c7fe9a2d2fd76f20796285a42da022
Author: Frank McQuillan <fm...@pivotal.io>
AuthorDate: Wed Sep 4 17:03:03 2019 -0700

    user docs for setting up model selection table
---
 doc/mainpage.dox.in                                |   2 +
 .../madlib_keras_model_selection.sql_in            | 336 +++++++++++++++++++++
 2 files changed, 338 insertions(+)

diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index daedce5..b31dedb 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -13,6 +13,7 @@ Useful links:
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
 <li>User documentation for earlier releases:
+    <a href="../v1.16/index.html">v1.15</a>,
     <a href="../v1.15.1/index.html">v1.15.1</a>,
     <a href="../v1.15/index.html">v1.15</a>,
     <a href="../v1.14/index.html">v1.14</a>,
@@ -292,6 +293,7 @@ Interface and implementation are subject to change.
         @defgroup grp_keras Keras
         @defgroup grp_keras_model_arch Load Model
         @defgroup grp_input_preprocessor_dl Preprocessor for Images
+        @defgroup grp_keras_model_selection Setup Model Selection
     @}
     @defgroup grp_bayes Naive Bayes Classification
     @defgroup grp_sample Random Sampling
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index f26a541..37914d4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -27,7 +27,343 @@
  *//* ----------------------------------------------------------------------- */
 
 m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_keras_model_selection
 
+@brief Utility function to set up a model selection table
+for hyperparameter tuning and model architecture search.
+
+\warning <em> This MADlib method is still in early stage development.
+Interface and implementation are subject to change. </em>
+
+<div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#load_mst_table">Load Model Selection Table</a></li>
+<li class="level1"><a href="#example">Examples</a></li>
+<li class="level1"><a href="#related">Related Topics</a></li>
+</ul></div>
+
+This utility function sets up a model selection table
+for use by the multiple model Keras fit feature of MADlib.
+By model selection we mean both hyperparameter tuning and
+model architecture search.  The table defines the unique combinations
+of model architectures, compile and fit parameters for the tests
+to run on a massively parallel processing database cluster.
+
+@anchor load_mst_table
+@par Load Model Selection Table
+
+<pre class="syntax">
+load_model_selection_table(
+    model_arch_table,
+    model_selection_table,
+    model_arch_id_list,
+    compile_params_list,
+    fit_params_list
+    )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+  <dt>model_arch_table</dt>
+  <dd>VARCHAR. Table containing model architectures and weights.
+  For more information on this table
+  refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+  </dd>
+
+  <dt>model_selection_table</dt>
+  <dd>VARCHAR. Model selection table created by this utility.  Content of
+  this table is described below.
+  </dd>
+
+  <dt>model_arch_id_list</dt>
+  <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
+  in the run combinations.  For hyperparameter search, this will typically be
+  one model ID.  For model architecture search, this will be the different model IDs
+  that you want to test.
+  </dd>
+
+  <dt>compile_params_list</dt>
+  <dd>VARCHAR[]. Array of compile parameters to be tested.  Each element
+  of the array should consist of a string of compile parameters
+  exactly as it is to be passed to Keras.
+  </dd>
+
+  <dt>fit_params_list</dt>
+  <dd>VARCHAR[].  Array of fit parameters to be tested.  Each element
+  of the array should consist of a string of fit parameters
+  exactly as it is to be passed to Keras.
+  </dd>
+
+</dl>
+
+<b>Output table</b>
+<br>
+    The model selection output table contains the following columns:
+    <table class="output">
+      <tr>
+        <th>mst_key</th>
+        <td>INTEGER. ID that defines a unique
+        model architecture-compile parameters-fit parameters tuple.
+        </td>
+      </tr>
+      <tr>
+        <th>model_arch_table</th>
+        <td>VARCHAR. Name of the table corresponding to the model architecture ID.
+        </td>
+      </tr>
+      <tr>
+        <th>model_arch_id</th>
+        <td>VARCHAR. Model architecture ID from the 'model_arch_table'.
+        </td>
+      </tr>
+      <tr>
+        <th>compile_params</th>
+        <td>VARCHAR. Keras compile parameters.
+        </td>
+      </tr>
+      <tr>
+        <th>fit_params</th>
+        <td>VARCHAR. Keras fit parameters.
+        </td>
+      </tr>
+    </table>
+</br>
+
+@anchor example
+@par Examples
+-# The model selection table works in conjunction with a model architecture table,
+so we first create a model architecture table with two different models.  Use Keras to define
+a model architecture with 1 hidden layer:
+<pre class="example">
+import keras
+from keras.models import Sequential
+from keras.layers import Dense
+model1 = Sequential()
+model1.add(Dense(10, activation='relu', input_shape=(4,)))
+model1.add(Dense(10, activation='relu'))
+model1.add(Dense(3, activation='softmax'))
+model1.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type)                 Output Shape              Param #
+=================================================================
+dense_1 (Dense)              (None, 10)                50
+_________________________________________________________________
+dense_2 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_3 (Dense)              (None, 3)                 33
+=================================================================
+Total params: 193
+Trainable params: 193
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model1.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+Now use Keras to define
+a model architecture with 2 hidden layers:
+<pre class="example">
+model2 = Sequential()
+model2.add(Dense(10, activation='relu', input_shape=(4,)))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(3, activation='softmax'))
+model2.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type)                 Output Shape              Param #
+=================================================================
+dense_4 (Dense)              (None, 10)                50
+_________________________________________________________________
+dense_5 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_6 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_7 (Dense)              (None, 3)                 33
+=================================================================
+Total params: 303
+Trainable params: 303
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model2.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+
+-# Load both models into the architecture table:
+<pre class="example">
+DROP TABLE IF EXISTS model_arch_library;
+SELECT madlib.load_keras_model('model_arch_library',  -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json,         -- JSON blob
+                               NULL,                  -- Weights
+                               'Sophie',              -- Name
+                               'MLP with 1 hidden layer'       -- Descr
+);
+SELECT madlib.load_keras_model('model_arch_library',  -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json,         -- JSON blob
+                               NULL,                  -- Weights
+                               'Maria',               -- Name
+                               'MLP with 2 hidden layers'       -- Descr
+);
+SELECT model_id, name, description FROM model_arch_library ORDER BY model_id;
+</pre>
+<pre class="result">
+ model_id |  name  |       description
+----------+--------+--------------------------
+        1 | Sophie | MLP with 1 hidden layer
+        2 | Maria  | MLP with 2 hidden layers
+(2 rows)
+</pre>
+
+-# Load model selection table.  Select the model(s) from the model
+architecture table that you want to run, along with the compile and
+fit parameters.  Unique combinations will be created for the set of
+model selection parameters:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table;
+SELECT madlib.load_model_selection_table('model_arch_library', -- model architecture table
+                                         'mst_table',          -- model selection table output
+                                          ARRAY[1,2],          -- model ids from model architecture table
+                                          ARRAY[               -- compile params
+                                              $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$,
+                                              $$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy']$$,
+                                              $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$
+                                          ],
+                                          ARRAY[               -- fit params
+                                              $$batch_size=4,epochs=1$$,
+                                              $$batch_size=8,epochs=1$$
+                                          ]
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |      fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+       4 | model_arch_library |             1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+       5 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+       6 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+       7 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       8 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       9 | model_arch_library |             2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+      10 | model_arch_library |             2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+      11 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+      12 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(12 rows)
+</pre>
+
+-# Create model selection table manually.  If you would like to have
+more control over the set of model selection parameters to run,
+you can manually create the model selection table.
+For example, let's say we don't want all combinations but only
+want 'batch_size=4' for 'model_id=1' and 'batch_size=8' for 'model_id=2':
+<pre class="example">
+DROP TABLE IF EXISTS mst_table_manual;
+CREATE TABLE mst_table_manual(
+    mst_key serial,
+    model_arch_table varchar,
+    model_arch_id integer,
+    compile_params varchar,
+    fit_params varchar
+);
+INSERT INTO mst_table_manual(model_arch_table, model_arch_id, compile_params, fit_params) VALUES
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=8,epochs=1');
+SELECT * FROM mst_table_manual ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |      fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=4,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+       4 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       5 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+       6 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(6 rows)
+</pre>
+
+-# Generate hyperparameters automatically.  You can use other libraries or methods
+to generate hyperparameters according to the tests that you want to run.
+For example, let's randomly generate batch size from powers of 2 and learning rate on a log scale.
+We use psycopg which is a PostgreSQL database adapter for the Python programming language.
+<pre class="example">
+import numpy as np
+import psycopg2 as p2
+conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')
+#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')
+cur = conn.cursor()
+\#
+%sql DROP TABLE IF EXISTS mst_table_auto;
+\#
+\#compile params
+learning_rate = np.random.permutation([0.1,0.01,0.001,0.0001])[:3]
+compile_param1 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[0]) + ")',metrics=['accuracy']"
+compile_param2 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[1]) + ")',metrics=['accuracy']"
+compile_param3 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[2]) + ")',metrics=['accuracy']"
+compile_params = [compile_param1,compile_param2,compile_param3]
+\#
+\#fit params
+batch_size = np.random.permutation([4,8,16,32,64])[:2]
+fit_param1 = "batch_size=" + str(batch_size[0]) + ",epochs=1"
+fit_param2 = "batch_size=" + str(batch_size[1]) + ",epochs=1"
+fit_params = [fit_param1,fit_param2]
+\#
+query = "SELECT madlib.load_model_selection_table('model_arch_library', 'mst_table_auto', ARRAY[1,2], %s, %s);"
+\#
+cur.execute(query,[compile_params, fit_params])
+conn.commit()
+\#
+\# review model selection table
+%sql SELECT * FROM mst_table_auto ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |       fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+------------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=64,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+       4 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=64,epochs=1
+       5 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+       6 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+       7 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       8 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=64,epochs=1
+       9 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+      10 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=64,epochs=1
+      11 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+      12 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+(12 rows)
+</pre>
+
+@anchor related
+@par Related Topics
+
+See keras_model_arch_table.sql_in
+
+*/
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_model_selection_table(
     model_arch_table        VARCHAR,