You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by fm...@apache.org on 2019/09/05 00:06:49 UTC

[madlib] branch load_mst_user_docs created (now 2059ddb)

This is an automated email from the ASF dual-hosted git repository.

fmcquillan pushed a change to branch load_mst_user_docs
in repository https://gitbox.apache.org/repos/asf/madlib.git.


      at 2059ddb  user docs for setting up model selection table

This branch includes the following new commits:

     new 2059ddb  user docs for setting up model selection table

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[madlib] 01/01: user docs for setting up model selection table

Posted by fm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

fmcquillan pushed a commit to branch load_mst_user_docs
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 2059ddbbe7c7fe9a2d2fd76f20796285a42da022
Author: Frank McQuillan <fm...@pivotal.io>
AuthorDate: Wed Sep 4 17:03:03 2019 -0700

    user docs for setting up model selection table
---
 doc/mainpage.dox.in                                |   2 +
 .../madlib_keras_model_selection.sql_in            | 336 +++++++++++++++++++++
 2 files changed, 338 insertions(+)

diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index daedce5..b31dedb 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -13,6 +13,7 @@ Useful links:
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
 <li>User documentation for earlier releases:
+    <a href="../v1.16/index.html">v1.15</a>,
     <a href="../v1.15.1/index.html">v1.15.1</a>,
     <a href="../v1.15/index.html">v1.15</a>,
     <a href="../v1.14/index.html">v1.14</a>,
@@ -292,6 +293,7 @@ Interface and implementation are subject to change.
         @defgroup grp_keras Keras
         @defgroup grp_keras_model_arch Load Model
         @defgroup grp_input_preprocessor_dl Preprocessor for Images
+        @defgroup grp_keras_model_selection Setup Model Selection
     @}
     @defgroup grp_bayes Naive Bayes Classification
     @defgroup grp_sample Random Sampling
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
index f26a541..37914d4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.sql_in
@@ -27,7 +27,343 @@
  *//* ----------------------------------------------------------------------- */
 
 m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_keras_model_selection
 
+@brief Utility function to set up a model selection table
+for hyperparameter tuning and model architecture search.
+
+\warning <em> This MADlib method is still in early stage development.
+Interface and implementation are subject to change. </em>
+
+<div class="toc"><b>Contents</b><ul>
+<li class="level1"><a href="#load_mst_table">Load Model Selection Table</a></li>
+<li class="level1"><a href="#example">Examples</a></li>
+<li class="level1"><a href="#related">Related Topics</a></li>
+</ul></div>
+
+This utility function sets up a model selection table
+for use by the multiple model Keras fit feature of MADlib.
+By model selection we mean both hyperparameter tuning and
+model architecture search.  The table defines the unique combinations
+of model architectures, compile and fit parameters for the tests
+to run on a massively parallel processing database cluster.
+
+@anchor load_mst_table
+@par Load Model Selection Table
+
+<pre class="syntax">
+load_model_selection_table(
+    model_arch_table,
+    model_selection_table,
+    model_arch_id_list,
+    compile_params_list,
+    fit_params_list
+    )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+  <dt>model_arch_table</dt>
+  <dd>VARCHAR. Table containing model architectures and weights.
+  For more information on this table
+  refer to <a href="group__grp__keras__model__arch.html">Load Model</a>.
+  </dd>
+
+  <dt>model_selection_table</dt>
+  <dd>VARCHAR. Model selection table created by this utility.  Content of
+  this table is described below.
+  </dd>
+
+  <dt>model_arch_id_list</dt>
+  <dd>INTEGER[]. Array of model IDs from the 'model_arch_table' to be included
+  in the run combinations.  For hyperparameter search, this will typically be
+  one model ID.  For model architecture search, this will be the different model IDs
+  that you want to test.
+  </dd>
+
+  <dt>compile_params_list</dt>
+  <dd>VARCHAR[]. Array of compile parameters to be tested.  Each element
+  of the array should consist of a string of compile parameters
+  exactly as it is to be passed to Keras.
+  </dd>
+
+  <dt>fit_params_list</dt>
+  <dd>VARCHAR[].  Array of fit parameters to be tested.  Each element
+  of the array should consist of a string of fit parameters
+  exactly as it is to be passed to Keras.
+  </dd>
+
+</dl>
+
+<b>Output table</b>
+<br>
+    The model selection output table contains the following columns:
+    <table class="output">
+      <tr>
+        <th>mst_key</th>
+        <td>INTEGER. ID that defines a unique
+        model architecture-compile parameters-fit parameters tuple.
+        </td>
+      </tr>
+      <tr>
+        <th>model_arch_table</th>
+        <td>VARCHAR. Name of the table corresponding to the model architecture ID.
+        </td>
+      </tr>
+      <tr>
+        <th>model_arch_id</th>
+        <td>VARCHAR. Model architecture ID from the 'model_arch_table'.
+        </td>
+      </tr>
+      <tr>
+        <th>compile_params</th>
+        <td>VARCHAR. Keras compile parameters.
+        </td>
+      </tr>
+      <tr>
+        <th>fit_params</th>
+        <td>VARCHAR. Keras fit parameters.
+        </td>
+      </tr>
+    </table>
+</br>
+
+@anchor example
+@par Examples
+-# The model selection table works in conjunction with a model architecture table,
+so we first create a model architecture table with two different models.  Use Keras to define
+a model architecture with 1 hidden layer:
+<pre class="example">
+import keras
+from keras.models import Sequential
+from keras.layers import Dense
+model1 = Sequential()
+model1.add(Dense(10, activation='relu', input_shape=(4,)))
+model1.add(Dense(10, activation='relu'))
+model1.add(Dense(3, activation='softmax'))
+model1.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type)                 Output Shape              Param #
+=================================================================
+dense_1 (Dense)              (None, 10)                50
+_________________________________________________________________
+dense_2 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_3 (Dense)              (None, 3)                 33
+=================================================================
+Total params: 193
+Trainable params: 193
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model1.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+Now use Keras to define
+a model architecture with 2 hidden layers:
+<pre class="example">
+model2 = Sequential()
+model2.add(Dense(10, activation='relu', input_shape=(4,)))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(10, activation='relu'))
+model2.add(Dense(3, activation='softmax'))
+model2.summary()
+\verbatim
+
+_________________________________________________________________
+Layer (type)                 Output Shape              Param #
+=================================================================
+dense_4 (Dense)              (None, 10)                50
+_________________________________________________________________
+dense_5 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_6 (Dense)              (None, 10)                110
+_________________________________________________________________
+dense_7 (Dense)              (None, 3)                 33
+=================================================================
+Total params: 303
+Trainable params: 303
+Non-trainable params: 0
+\endverbatim
+</pre>
+Export the model to JSON:
+<pre class="example">
+model2.to_json()
+</pre>
+<pre class="result">
+'{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "u [...]
+</pre>
+
+-# Load both models into the architecture table:
+<pre class="example">
+DROP TABLE IF EXISTS model_arch_library;
+SELECT madlib.load_keras_model('model_arch_library',  -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json,         -- JSON blob
+                               NULL,                  -- Weights
+                               'Sophie',              -- Name
+                               'MLP with 1 hidden layer'       -- Descr
+);
+SELECT madlib.load_keras_model('model_arch_library',  -- Output table,
+$$
+{"class_name": "Sequential", "keras_version": "2.1.6", "config": [{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_4", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "dtype": "float32", "activation": "relu", "trainable": true, "kernel_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "un [...]
+$$
+::json,         -- JSON blob
+                               NULL,                  -- Weights
+                               'Maria',               -- Name
+                               'MLP with 2 hidden layers'       -- Descr
+);
+SELECT model_id, name, description FROM model_arch_library ORDER BY model_id;
+</pre>
+<pre class="result">
+ model_id |  name  |       description
+----------+--------+--------------------------
+        1 | Sophie | MLP with 1 hidden layer
+        2 | Maria  | MLP with 2 hidden layers
+(2 rows)
+</pre>
+
+-# Load model selection table.  Select the model(s) from the model
+architecture table that you want to run, along with the compile and
+fit parameters.  Unique combinations will be created for the set of
+model selection parameters:
+<pre class="example">
+DROP TABLE IF EXISTS mst_table;
+SELECT madlib.load_model_selection_table('model_arch_library', -- model architecture table
+                                         'mst_table',          -- model selection table output
+                                          ARRAY[1,2],          -- model ids from model architecture table
+                                          ARRAY[               -- compile params
+                                              $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$,
+                                              $$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy']$$,
+                                              $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$
+                                          ],
+                                          ARRAY[               -- fit params
+                                              $$batch_size=4,epochs=1$$,
+                                              $$batch_size=8,epochs=1$$
+                                          ]
+                                         );
+SELECT * FROM mst_table ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |      fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+       4 | model_arch_library |             1 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+       5 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+       6 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+       7 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       8 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       9 | model_arch_library |             2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=4,epochs=1
+      10 | model_arch_library |             2 | loss='categorical_crossentropy', optimizer='Adam(lr=0.01)',metrics=['accuracy'] | batch_size=8,epochs=1
+      11 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+      12 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(12 rows)
+</pre>
+
+-# Create model selection table manually.  If you would like to have
+more control over the set of model selection parameters to run,
+you can manually create the model selection table.
+For example, let's say we don't want all combinations but only
+want 'batch_size=4' for 'model_id=1' and 'batch_size=8' for 'model_id=2':
+<pre class="example">
+DROP TABLE IF EXISTS mst_table_manual;
+CREATE TABLE mst_table_manual(
+    mst_key serial,
+    model_arch_table varchar,
+    model_arch_id integer,
+    compile_params varchar,
+    fit_params varchar
+);
+INSERT INTO mst_table_manual(model_arch_table, model_arch_id, compile_params, fit_params) VALUES
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 1, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=4,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']$$, 'batch_size=8,epochs=1'),
+('model_arch_library', 2, $$loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy']$$, 'batch_size=8,epochs=1');
+SELECT * FROM mst_table_manual ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |      fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+-----------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=4,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=4,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=4,epochs=1
+       4 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       5 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+       6 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+(6 rows)
+</pre>
+
+-# Generate hyperparameters automatically.  You can use other libraries or methods
+to generate hyperparameters according to the tests that you want to run.
+For example, let's randomly generate batch size from powers of 2 and learning rate on a log scale.
+We use psycopg which is a PostgreSQL database adapter for the Python programming language.
+<pre class="example">
+import numpy as np
+import psycopg2 as p2
+conn = p2.connect('postgresql://gpadmin@35.239.240.26:5432/madlib')
+#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')
+cur = conn.cursor()
+\#
+%sql DROP TABLE IF EXISTS mst_table_auto;
+\#
+\#compile params
+learning_rate = np.random.permutation([0.1,0.01,0.001,0.0001])[:3]
+compile_param1 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[0]) + ")',metrics=['accuracy']"
+compile_param2 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[1]) + ")',metrics=['accuracy']"
+compile_param3 = "loss='categorical_crossentropy',optimizer='Adam(lr=" + str(learning_rate[2]) + ")',metrics=['accuracy']"
+compile_params = [compile_param1,compile_param2,compile_param3]
+\#
+\#fit params
+batch_size = np.random.permutation([4,8,16,32,64])[:2]
+fit_param1 = "batch_size=" + str(batch_size[0]) + ",epochs=1"
+fit_param2 = "batch_size=" + str(batch_size[1]) + ",epochs=1"
+fit_params = [fit_param1,fit_param2]
+\#
+query = "SELECT madlib.load_model_selection_table('model_arch_library', 'mst_table_auto', ARRAY[1,2], %s, %s);"
+\#
+cur.execute(query,[compile_params, fit_params])
+conn.commit()
+\#
+\# review model selection table
+%sql SELECT * FROM mst_table_auto ORDER BY mst_key;
+</pre>
+<pre class="result">
+ mst_key |  model_arch_table  | model_arch_id |                                 compile_params                                  |       fit_params
+---------+--------------------+---------------+---------------------------------------------------------------------------------+------------------------
+       1 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       2 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=64,epochs=1
+       3 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+       4 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=64,epochs=1
+       5 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+       6 | model_arch_library |             1 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+       7 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=8,epochs=1
+       8 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.1)',metrics=['accuracy']   | batch_size=64,epochs=1
+       9 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=8,epochs=1
+      10 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.01)',metrics=['accuracy']  | batch_size=64,epochs=1
+      11 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=8,epochs=1
+      12 | model_arch_library |             2 | loss='categorical_crossentropy',optimizer='Adam(lr=0.001)',metrics=['accuracy'] | batch_size=64,epochs=1
+(12 rows)
+</pre>
+
+@anchor related
+@par Related Topics
+
+See keras_model_arch_table.sql_in
+
+*/
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_model_selection_table(
     model_arch_table        VARCHAR,