You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/06/11 21:41:11 UTC

[madlib] 02/02: DL: Add online docs for madlib_keras functions

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 0ed2df5a6407e87e9a215dd0f303aeec6fbe281c
Author: Orhan Kislal <ok...@pivotal.io>
AuthorDate: Tue Jun 11 14:40:35 2019 -0700

    DL: Add online docs for madlib_keras functions
    
    JIRA: MADLIB-1307
    
    Closes #409
---
 .../deep_learning/keras_model_arch_table.py_in     |   8 +-
 .../modules/deep_learning/madlib_keras.py_in       | 135 +++++++++++++++++++++
 .../modules/deep_learning/madlib_keras.sql_in      |  47 +++++++
 .../deep_learning/madlib_keras_predict.py_in       |  62 ++++++++++
 4 files changed, 250 insertions(+), 2 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
index 28ab753..7d91540 100644
--- a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
+++ b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
@@ -165,8 +165,12 @@ class KerasModelArchDocumentation:
                                         USAGE
         ---------------------------------------------------------------------------
         SELECT {schema_madlib}.{method}(
-            keras_model_arch_table VARCHAR, -- Output table to load keras model arch.
-            model_arch             JSON     -- JSON of the model architecture to insert.
+            keras_model_arch_table, --  Output table to load keras model arch.
+            model_arch,             --  JSON of the model architecture to insert.
+            model_weights,          --  Model weights to load as a PostgreSQL
+                                        binary data type.
+            name,                   --  Free text string to identify a name
+            description             --  Free text string to provide a description
         );
 
 
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 8187d8d..a85e1f3 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -670,3 +670,138 @@ def internal_keras_eval_final(state, **kwargs):
     metric /= image_count
 
     return loss, metric
+
+
+
+def fit_help(schema_madlib, message, **kwargs):
+    """
+    Help function for keras fit
+
+    Args:
+        @param schema_madlib
+        @param message: string, Help message string
+        @param kwargs
+
+    Returns:
+        String. Help/usage information
+    """
+    if not message:
+        help_string = """
+-----------------------------------------------------------------------
+                            SUMMARY
+-----------------------------------------------------------------------
+This module allows you to use SQL to call deep learning
+models designed in Keras, which is a high-level neural
+network API written in Python.
+Keras was developed for fast experimentation.  It can run
+on top of different backends and the one that is currently
+supported by MADlib is TensorFlow.  The implementation
+in MADlib is distributed and designed to train
+a single large model across multiple segments (workers)
+in a Greenplum database.  PostgreSQL is also supported.
+
+For more details on function usage:
+    SELECT {schema_madlib}.madlib_keras_fit('usage')
+            """
+    elif message in ['usage', 'help', '?']:
+        help_string = """
+-----------------------------------------------------------------------
+                            USAGE
+-----------------------------------------------------------------------
+ SELECT {schema_madlib}.madlib_keras_fit(
+    source_table,               --  Name of the table containing the
+                                    training data
+    model,                      --  Name of the output table containing
+                                    the model
+    model_arch_table,           --  Name of the table containing the
+                                    model architecture
+    model_arch_id,              --  This is the id in 'model_arch_table'
+                                    containing the model architecture
+    compile_params,             --  Parameters passed to the compile
+                                    method of the Keras model class
+    fit_params,                 --  Parameters passed to the fit method
+                                    of the Keras model class
+    num_iterations,             --  Number of iterations to train.
+    gpus_per_host,              --  Number of GPUs per segment host to
+                                    be used for training
+    validation_table,           --  Name of the table containing
+                                    the validation dataset
+    metrics_compute_frequency,  --  Frequency to compute per-iteration
+                                    metrics
+    warm_start,                 --  Flag to enable warm start
+    name,                       --  Free text string to identify a name
+    description                 --  Free text string to provide a description
+    )
+ );
+
+-----------------------------------------------------------------------
+                            OUTPUT
+-----------------------------------------------------------------------
+The output table ('model' above) contains the following columns:
+
+model_data: Byte array containing the weights of the neural net.
+model_arch: A JSON representation of the model architecture used in
+            training.
+
+A summary table ('<model>_summary') is created to store various training
+statistics as well as the input parameters.
+"""
+    else:
+        help_string = "No such option. Use {schema_madlib}.madlib_keras_fit()"
+
+    return help_string.format(schema_madlib=schema_madlib)
+# ---------------------------------------------------------------------
+
+
+def evaluate_help(schema_madlib, message, **kwargs):
+    """
+    Help function for keras evaluate
+
+    Args:
+        @param schema_madlib
+        @param message: string, Help message string
+        @param kwargs
+
+    Returns:
+        String. Help/usage information
+    """
+    if not message:
+        help_string = """
+-----------------------------------------------------------------------
+                            SUMMARY
+-----------------------------------------------------------------------
+This function allows the user to evaluate a madlib_keras_fit trained
+model.
+
+For more details on function usage:
+    SELECT {schema_madlib}.madlib_keras_evaluate('usage')
+            """
+    elif message in ['usage', 'help', '?']:
+        help_string = """
+-----------------------------------------------------------------------
+                            USAGE
+-----------------------------------------------------------------------
+ SELECT {schema_madlib}.madlib_keras_evaluate(
+    model_table,    --  Name of the table containing the model
+    test_table,     --  Name of the table containing the evaluation dataset
+    output_table,   --  Name of the output table
+    gpus_per_host   --  Number of GPUs per segment host to
+                        be used for training
+    )
+ );
+
+-----------------------------------------------------------------------
+                            OUTPUT
+-----------------------------------------------------------------------
+The output table ('output_table' above) contains the following columns:
+
+loss:           Loss value on evaluation dataset.
+metric:         Metric value on evaluation dataset, where 'metrics_type'
+                below identifies the type of metric.
+metrics_type:   Type of metric used that was used in the training step.
+"""
+    else:
+        help_string = "No such option. Use {schema_madlib}.madlib_keras_evaluate()"
+
+    return help_string.format(schema_madlib=schema_madlib)
+# ---------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index a34727b..2bdd43f 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -1707,3 +1707,50 @@ CREATE AGGREGATE MADLIB_SCHEMA.internal_keras_evaluate(
     FINALFUNC=MADLIB_SCHEMA.internal_keras_eval_final
 );
 
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
+     message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
+    with AOControl(False):
+        return madlib_keras.fit_help(**globals())
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_fit('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
+     message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
+    with AOControl(False):
+        return madlib_keras.evaluate_help(**globals())
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_evaluate('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
+     message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
+    with AOControl(False):
+        return madlib_keras_predict.predict_help(**globals())
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.madlib_keras_predict('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index f333f04..b5f35dc 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -160,3 +160,65 @@ def internal_keras_predict(independent_var, model_architecture, model_data,
         SD.pop(row_count_key, None)
         clear_keras_session()
         plpy.error(ex)
+
+
+def predict_help(schema_madlib, message, **kwargs):
+    """
+    Help function for keras predict
+
+    Args:
+        @param schema_madlib
+        @param message: string, Help message string
+        @param kwargs
+
+    Returns:
+        String. Help/usage information
+    """
+    if not message:
+        help_string = """
+-----------------------------------------------------------------------
+                            SUMMARY
+-----------------------------------------------------------------------
+This function allows the user to predict using a madlib_keras_fit trained
+model.
+
+For more details on function usage:
+    SELECT {schema_madlib}.madlib_keras_predict('usage')
+            """
+    elif message in ['usage', 'help', '?']:
+        help_string = """
+-----------------------------------------------------------------------
+                            USAGE
+-----------------------------------------------------------------------
+ SELECT {schema_madlib}.madlib_keras_predict(
+    model_table,    --  Name of the table containing the model
+    test_table,     --  Name of the table containing the evaluation dataset
+    id_col,         --  Name of the id column in the test data table
+    independent_varname,    --  Name of the column with independent
+                                variables in the test table
+    output_table,   --  Name of the output table
+    pred_type,      --  The type of the desired output
+    gpus_per_host   --  Number of GPUs per segment host to
+                        be used for training
+    )
+ );
+
+-----------------------------------------------------------------------
+                            OUTPUT
+-----------------------------------------------------------------------
+The output table ('output_table' above) contains the following columns:
+
+id:                 Gives the 'id' for each prediction, corresponding
+                    to each row from the test_table.
+estimated_COL_NAME: (For pred_type='response') The estimated class for
+                    classification, where COL_NAME is the name of the
+                    column to be predicted from test data.
+prob_CLASS:         (For pred_type='prob' for classification) The
+                    probability of a given class. There will be one
+                    column for each class in the training data.
+"""
+    else:
+        help_string = "No such option. Use {schema_madlib}.madlib_keras_predict()"
+
+    return help_string.format(schema_madlib=schema_madlib)
+# ---------------------------------------------------------------------