You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/08/29 20:42:16 UTC
[39/50] [abbrv] incubator-madlib git commit: MLP: Add multiple enhancements

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/b7fdb804/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 6cea7b0..550d630 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -16,7 +16,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
 """
 @file mlp_igd.py_in
 
@@ -24,17 +23,18 @@
 
 @namespace mlp_igd
 """
+import math
 import plpy
 
-from utilities.control import MinWarning
 from utilities.utilities import add_postfix
 from utilities.utilities import py_list_to_sql_string
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import _assert
+from utilities.utilities import _assert_equal
 from utilities.utilities import unique_string
 from utilities.utilities import strip_end_quotes
-
 from utilities.validate_args import cols_in_tbl_valid
+from utilities.validate_args import table_exists
 from utilities.validate_args import input_tbl_valid
 from utilities.validate_args import is_var_valid
 from utilities.validate_args import output_tbl_valid
@@ -42,10 +42,14 @@ from utilities.validate_args import get_expr_type
 from utilities.validate_args import array_col_has_same_dimension
 from utilities.validate_args import array_col_dimension
 
+from convex.utils_regularization import __utils_ind_var_scales
+
+from elastic_net.elastic_net_utils import _tbl_dimension_rownum
+
 
 def mlp(schema_madlib, source_table, output_table, independent_varname,
-        dependent_varname, hidden_layer_sizes,
-        optimizer_param_str, activation, is_classification, **kwargs):
+        dependent_varname, hidden_layer_sizes, optimizer_param_str, activation,
+        is_classification, weights, warm_start, verbose=False):
     """
     Args:
         @param schema_madlib
@@ -59,62 +63,128 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
     Returns:
         None
     """
-    with MinWarning('warning'):
-        optimizer_params = _get_optimizer_params(optimizer_param_str or "")
-        summary_table = add_postfix(output_table, "_summary")
-        _validate_args(source_table, output_table, summary_table, independent_varname,
-                       dependent_varname, hidden_layer_sizes,
-                       optimizer_params, is_classification)
-
-        current_iteration = 1
-        prev_state = None
-        tolerance = optimizer_params["tolerance"]
-        n_iterations = optimizer_params["n_iterations"]
-        step_size = optimizer_params["step_size"]
-        n_tries = optimizer_params["n_tries"]
-        activation_name = _get_activation_function_name(activation)
-        activation_index = _get_activation_index(activation_name)
-        num_input_nodes = array_col_dimension(
-            source_table, independent_varname)
-        num_output_nodes = 0
-        classes = []
-        dependent_type = get_expr_type(dependent_varname, source_table)
-        original_dependent_varname = dependent_varname
-
-        if is_classification:
-            dependent_variable_sql = """
-                SELECT DISTINCT {dependent_varname}
-                FROM {source_table}
-                """.format(dependent_varname=dependent_varname,
-                           source_table=source_table)
-            labels = plpy.execute(dependent_variable_sql)
-            one_hot_dependent_varname = 'ARRAY['
-            num_output_nodes = len(labels)
-            for label_obj in labels:
-                label = _format_label(label_obj[dependent_varname])
-                classes.append(label)
-                one_hot_dependent_varname += dependent_varname + \
-                    "=" + str(label) + ","
-            # Remove the last comma
-            one_hot_dependent_varname = one_hot_dependent_varname[:-1]
-            one_hot_dependent_varname += ']::integer[]'
-            dependent_varname = one_hot_dependent_varname
-        else:
-            if "[]" not in dependent_type:
-                dependent_varname = "ARRAY[" + dependent_varname + "]"
-            num_output_nodes = array_col_dimension(
-                source_table, dependent_varname)
-        layer_sizes = [num_input_nodes] + \
-            hidden_layer_sizes + [num_output_nodes]
+    warm_start = bool(warm_start)
+    optimizer_params = _get_optimizer_params(optimizer_param_str or "")
+    summary_table = add_postfix(output_table, "_summary")
+    weights = '1' if not weights or not weights.strip() else weights.strip()
+    hidden_layer_sizes = hidden_layer_sizes or []
+    activation = _get_activation_function_name(activation)
+    learning_rate_policy = _get_learning_rate_policy_name(
+        optimizer_params["learning_rate_policy"])
+    activation_index = _get_activation_index(activation)
+
+    _validate_args(source_table, output_table, summary_table, independent_varname,
+                   dependent_varname, hidden_layer_sizes,
+                   optimizer_params, is_classification, weights,
+                   warm_start, activation)
+
+    current_iteration = 1
+    prev_state = None
+    tolerance = optimizer_params["tolerance"]
+    n_iterations = optimizer_params["n_iterations"]
+    step_size_init = optimizer_params["learning_rate_init"]
+    iterations_per_step = optimizer_params["iterations_per_step"]
+    power = optimizer_params["power"]
+    gamma = optimizer_params["gamma"]
+    step_size = step_size_init
+    n_tries = optimizer_params["n_tries"]
+    # lambda is a reserved word in python
+    lmbda = optimizer_params["lambda"]
+    iterations_per_step = optimizer_params["iterations_per_step"]
+    num_input_nodes = array_col_dimension(source_table,
+                                          independent_varname)
+    num_output_nodes = 0
+    classes = []
+    dependent_type = get_expr_type(dependent_varname, source_table)
+    original_dependent_varname = dependent_varname
+    dimension, n_tuples = _tbl_dimension_rownum(
+        schema_madlib, source_table, independent_varname)
+    x_scales = __utils_ind_var_scales(
+        source_table, independent_varname, dimension, schema_madlib)
+    x_means = py_list_to_sql_string(
+        x_scales["mean"], array_type="DOUBLE PRECISION")
+    filtered_stds = [x if x != 0 else 1 for x in x_scales["std"]]
+    x_stds = py_list_to_sql_string(
+        filtered_stds, array_type="DOUBLE PRECISION")
 
+    if is_classification:
+        dependent_variable_sql = """
+        SELECT DISTINCT {dependent_varname}
+        FROM {source_table}
+        """.format(
+            dependent_varname=dependent_varname, source_table=source_table)
+        labels = plpy.execute(dependent_variable_sql)
+        one_hot_dependent_varname = 'ARRAY['
+        num_output_nodes = len(labels)
+        for label_obj in labels:
+            label = _format_label(label_obj[dependent_varname])
+            classes.append(label)
+        classes.sort()
+        for c in classes:
+            one_hot_dependent_varname += dependent_varname + \
+                "=" + str(c) + ","
+        # Remove the last comma
+        one_hot_dependent_varname = one_hot_dependent_varname[:-1]
+        one_hot_dependent_varname += ']::integer[]'
+        dependent_varname = one_hot_dependent_varname
+    else:
+        if "[]" not in dependent_type:
+            dependent_varname = "ARRAY[" + dependent_varname + "]"
+        num_output_nodes = array_col_dimension(
+            source_table, dependent_varname)
+    layer_sizes = [num_input_nodes] + \
+        hidden_layer_sizes + [num_output_nodes]
+
+    # Need layers sizes before validating for warm_start
+    coeff = []
+    for i in range(len(layer_sizes) - 1):
+        fan_in = layer_sizes[i]
+        fan_out = layer_sizes[i + 1]
+        # Initalize according to Glorot and Bengio (2010)
+        # See design doc for more info
+        span = math.sqrt(6.0 / (fan_in + fan_out))
+        dim = (layer_sizes[i] + 1) * layer_sizes[i + 1]
+        rand = plpy.execute("""SELECT array_agg({span}*(random()-0.5))
+                               AS random
+                               FROM generate_series(0,{dim})
+                """.format(span=span, dim=dim))[0]["random"]
+        coeff += rand
+
+    if warm_start:
+        coeff, x_means, x_stds = _validate_warm_start(
+                source_table, output_table, summary_table, independent_varname,
+                original_dependent_varname, layer_sizes, optimizer_params,
+                is_classification, weights, warm_start, activation)
+        plpy.execute("DROP TABLE IF EXISTS {0}".format(output_table))
+        plpy.execute("DROP TABLE IF EXISTS {0}".format(summary_table))
+    best_state = []
+    best_loss = [float('inf')]
+    prev_loss = float('inf')
+    loss = None
+    for _ in range(n_tries):
         while True:
             if prev_state:
                 prev_state_str = py_list_to_sql_string(
                     prev_state, array_type="double precision")
             else:
                 prev_state_str = "(NULL)::DOUBLE PRECISION[]"
+            # else block is for "constant", so don't do anything
+            zero_indexed_iteration = current_iteration - 1
+            if learning_rate_policy == "exp":
+                step_size = step_size_init * gamma**zero_indexed_iteration
+            elif learning_rate_policy == "inv":
+                step_size = step_size_init * (current_iteration)**(-power)
+            elif learning_rate_policy == "step":
+                step_size = step_size_init * gamma**(
+                    math.floor(zero_indexed_iteration / iterations_per_step))
+
+
             train_sql = """
             SELECT
+                (result).state as state,
+                (result).loss  as loss
+            FROM (
+            SELECT
                 {schema_madlib}.mlp_igd_step(
                     ({independent_varname})::DOUBLE PRECISION[],
                     ({dependent_varname})::DOUBLE PRECISION[],
@@ -122,105 +192,153 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                     {layer_sizes},
                     ({step_size})::FLOAT8,
                     {activation},
-                    {is_classification}) as curr_state
-            FROM {source_table} AS _src
-            """.format(schema_madlib=schema_madlib,
-                       independent_varname=independent_varname,
-                       dependent_varname=dependent_varname,
-                       prev_state=prev_state_str,
-                       # C++ uses double internally
-                       layer_sizes=py_list_to_sql_string(layer_sizes,
-                                                         array_type="double precision"),
-                       step_size=step_size,
-                       source_table=source_table,
-                       activation=activation_index,
-                       is_classification=int(is_classification))
-            curr_state = plpy.execute(train_sql)[0]["curr_state"]
-            dist_sql = """
-                SELECT {schema_madlib}.internal_mlp_igd_distance(
-                        {prev_state},
-                        {curr_state}) as state_dist
-                """.format(schema_madlib=schema_madlib,
-                           prev_state=prev_state_str,
-                           curr_state=py_list_to_sql_string(curr_state, "double precision"))
-            state_dist = plpy.execute(dist_sql)[0]["state_dist"]
-            if ((state_dist and state_dist < tolerance) or
-                    current_iteration > n_iterations):
+                    {is_classification},
+                    ({weights})::DOUBLE PRECISION,
+                    {warm_start},
+                    ({warm_start_coeff})::DOUBLE PRECISION[],
+                    {n_tuples},
+                    {lmbda},
+                    {x_means},
+                    {x_stds}
+                    ) as result
+            FROM {source_table} as _src) _step_q
+            """.format(
+                schema_madlib=schema_madlib,
+                independent_varname=independent_varname,
+                dependent_varname=dependent_varname,
+                prev_state=prev_state_str,
+                # c++ uses double internally
+                layer_sizes=py_list_to_sql_string(
+                    layer_sizes, array_type="DOUBLE PRECISION"),
+                step_size=step_size,
+                source_table=source_table,
+                activation=activation_index,
+                is_classification=int(is_classification),
+                weights=weights,
+                warm_start=warm_start,
+                warm_start_coeff=py_list_to_sql_string(
+                    coeff, array_type="DOUBLE PRECISION"),
+                n_tuples=n_tuples,
+                lmbda=lmbda,
+                x_means=x_means,
+                x_stds=x_stds)
+            step_result = plpy.execute(train_sql)[0]
+            curr_state = step_result['state']
+            loss = step_result['loss']
+            if verbose and 1 < current_iteration <= n_iterations:
+                plpy.info("Iteration: " + str(current_iteration -
+                                              1) + ", Loss: " + str(loss))
+            state_dist = abs(loss-prev_loss)
+            if ((state_dist and state_dist < tolerance)
+                    or current_iteration > n_iterations):
                 break
             prev_state = curr_state
+            prev_loss = loss
             current_iteration += 1
-        _build_model_table(schema_madlib, output_table,
-                           curr_state, n_iterations)
-        layer_sizes_str = py_list_to_sql_string(
-            layer_sizes, array_type="integer")
-        classes_str = py_list_to_sql_string(
-            [strip_end_quotes(cl, "'") for cl in classes],
-            array_type=dependent_type)
-        summary_table_creation_query = """
-        CREATE TABLE {summary_table}(
-            source_table TEXT,
-            independent_varname TEXT,
-            dependent_varname TEXT,
-            tolerance FLOAT,
-            step_size FLOAT,
-            n_iterations INTEGER,
-            n_tries INTEGER,
-            layer_sizes INTEGER[],
-            activation_function TEXT,
-            is_classification BOOLEAN,
-            classes {dependent_type}[]
-        )""".format(summary_table=summary_table,
-                    dependent_type=dependent_type)
-
-        summary_table_update_query = """
-            INSERT INTO {summary_table} VALUES(
-                '{source_table}',
-                '{independent_varname}',
-                '{original_dependent_varname}',
-                {tolerance},
-                {step_size},
-                {n_iterations},
-                {n_tries},
-                {layer_sizes_str},
-                '{activation_name}',
-                {is_classification},
-                {classes_str}
-            )
-            """.format(**locals())
-        plpy.execute(summary_table_creation_query)
-        plpy.execute(summary_table_update_query)
-# ----------------------------------------------------------------------
-
-
-def _build_model_table(schema_madlib, output_table, final_state, n_iterations):
+        # We use previous state because the last iteration
+        # just calculates loss
+        if loss < best_loss:
+            best_state = prev_state
+            best_loss = loss
+        current_iteration = 1
+        prev_state = None
+    _build_model_table(schema_madlib, output_table, best_state,
+                       best_loss, n_iterations)
+    layer_sizes_str = py_list_to_sql_string(
+        layer_sizes, array_type="integer")
+    classes_str = py_list_to_sql_string(
+        [strip_end_quotes(cl, "'") for cl in classes],
+        array_type=dependent_type)
+    summary_table_creation_query = """
+    CREATE TABLE {summary_table}(
+        source_table TEXT,
+        independent_varname TEXT,
+        dependent_varname TEXT,
+        tolerance FLOAT,
+        learning_rate_init FLOAT,
+        learning_rate_policy TEXT,
+        n_iterations INTEGER,
+        n_tries INTEGER,
+        layer_sizes INTEGER[],
+        activation TEXT,
+        is_classification BOOLEAN,
+        classes {dependent_type}[],
+        weights VARCHAR,
+        x_means DOUBLE PRECISION[],
+        x_stds DOUBLE PRECISION[]
+    )""".format(summary_table=summary_table,
+                dependent_type=dependent_type)
+
+    summary_table_update_query = """
+    INSERT INTO {summary_table} VALUES(
+        '{source_table}',
+        '{independent_varname}',
+        '{original_dependent_varname}',
+        {tolerance},
+        {step_size_init},
+        '{learning_rate_policy}',
+        {n_iterations},
+        {n_tries},
+        {layer_sizes_str},
+        '{activation}',
+        {is_classification},
+        {classes_str},
+        '{weights}',
+        {x_means},
+        {x_stds}
+    )
+    """.format(**locals())
+    plpy.execute(summary_table_creation_query)
+    plpy.execute(summary_table_update_query)
+    return None
+
+
+def _get_loss(schema_madlib, state):
+    return plpy.execute("""
+    SELECT
+        (result).loss  AS loss
+    FROM (
+        SELECT
+            {schema_madlib}.internal_mlp_igd_result(
+                {final_state_str}
+            ) AS result
+    ) rel_state_subq
+    """.format(
+        schema_madlib=schema_madlib,
+        final_state_str=py_list_to_sql_string(state)))[0]["loss"]
+
+
+def _build_model_table(schema_madlib, output_table, final_state, loss, n_iterations):
     final_state_str = py_list_to_sql_string(
         final_state, array_type="double precision")
 
     model_table_query = """
-        CREATE TABLE {output_table} AS
+    CREATE TABLE {output_table} AS
+        SELECT
+            (result).coeff as coeff,
+            {loss}  as loss,
+            {n_iterations} as num_iterations
+        FROM (
             SELECT
-                (result).coeff AS coeff,
-                (result).loss  AS loss,
-                {n_iterations} AS num_iterations
-                -- (result).num_rows_processed     AS num_rows_processed,
-                -- n_tuples_including_nulls - (result).num_rows_processed
-            FROM (
-                SELECT
-                    {schema_madlib}.internal_mlp_igd_result(
-                        {final_state_str}
-                    ) AS result
-            ) rel_state_subq
-        """.format(**locals())
+                {schema_madlib}.internal_mlp_igd_result(
+                    {final_state_str}
+                ) AS result
+        ) rel_state_subq
+    """.format(**locals())
     plpy.execute(model_table_query)
-# ----------------------------------------------------------------------
 
 
 def _get_optimizer_params(param_str):
     params_defaults = {
-        "step_size": (0.001, float),
+        "learning_rate_init": (0.001, float),
         "n_iterations": (100, int),
         "n_tries": (1, int),
         "tolerance": (0.001, float),
+        "learning_rate_policy": ("constant", str),
+        "gamma": (0.1, float),
+        "iterations_per_step": (100, int),
+        "power": (0.5, float),
+        "lambda": (0, float)
     }
     param_defaults = dict([(k, v[0]) for k, v in params_defaults.items()])
     param_types = dict([(k, v[1]) for k, v in params_defaults.items()])
@@ -228,10 +346,9 @@ def _get_optimizer_params(param_str):
     if not param_str:
         return param_defaults
 
-    name_value = extract_keyvalue_params(param_str, param_types, param_defaults,
-                                         ignore_invalid=False)
+    name_value = extract_keyvalue_params(
+        param_str, param_types, param_defaults, ignore_invalid=False)
     return name_value
-# ----------------------------------------------------------------------
 
 
 def _validate_args_classification(source_table, dependent_varname):
@@ -239,89 +356,174 @@ def _validate_args_classification(source_table, dependent_varname):
     int_types = ['integer', 'smallint', 'bigint']
     text_types = ['text', 'varchar', 'character varying', 'char', 'character']
     boolean_types = ['boolean']
-    _assert("[]" in expr_type or expr_type in int_types + text_types + boolean_types,
+    _assert("[]" in expr_type
+            or expr_type in int_types + text_types + boolean_types,
             "Dependent variable column should refer to an "
             "integer, boolean, text, varchar, or character type.")
-# ----------------------------------------------------------------------
 
 
 def _validate_args_regression(source_table, dependent_varname):
     expr_type = get_expr_type(dependent_varname, source_table)
     int_types = ['integer', 'smallint', 'bigint']
     float_types = ['double precision', 'real']
-    _assert("[]" in expr_type or expr_type in int_types + float_types,
-            "Dependent variable column should refer to an array or numeric type")
+    _assert(
+        "[]" in expr_type or expr_type in int_types + float_types,
+        "Dependent variable column should refer to an array or numeric type")
     if "[]" in expr_type:
-        _assert(array_col_has_same_dimension(source_table, dependent_varname),
-                "Dependent variable column should refer to arrays of the same length")
-# ----------------------------------------------------------------------
+        _assert(
+            array_col_has_same_dimension(source_table, dependent_varname),
+            "Dependent variable column should refer to arrays of the same length"
+        )
+
+
+def _validate_summary_table(summary_table):
+    input_tbl_valid(summary_table, 'MLP')
+    cols_in_tbl_valid(summary_table, [
+        'dependent_varname', 'independent_varname', 'activation',
+        'tolerance', 'learning_rate_init', 'n_iterations', 'n_tries',
+        'classes', 'layer_sizes', 'source_table', 'x_means', 'x_stds'
+    ], 'MLP')
+
+
+def _validate_warm_start(source_table, output_table, summary_table, independent_varname,
+                         dependent_varname, layer_sizes,
+                         optimizer_params, is_classification, weights,
+                         warm_start, activation):
+    _assert(table_exists(output_table),
+            "MLP error: Warm start failed due to missing model table: " + output_table)
+    _assert(table_exists(summary_table),
+            "MLP error: Warm start failed due to missing summary table: " + summary_table)
+
+    _assert(optimizer_params["n_tries"] == 1,
+            "MLP error: warm_start is only compatible for n_tries = 1")
+
+    summary = plpy.execute("SELECT * FROM {0}".format(summary_table))[0]
+    params = [
+        "independent_varname", "dependent_varname", "layer_sizes",
+        "is_classification", "weights", "activation"
+    ]
+    for param in params:
+        _assert_equal(eval(param), summary[param],
+                      "MLP error: warm start failed due to different parameter value: " +
+                      param)
+    output = plpy.execute("SELECT * FROM {0}".format(output_table))[0]
+    coeff = output['coeff']
+    num_coeffs = sum(
+        map(lambda i: (layer_sizes[i] + 1) * (layer_sizes[i + 1]),
+            range(len(layer_sizes) - 1)))
+    _assert_equal(num_coeffs,
+                  len(coeff),
+                  "MLP error: Warm start failed to invalid output_table: " +
+                  output_table + ". Invalid number of coefficients in model.")
+    x_means = py_list_to_sql_string(
+        summary["x_means"], array_type="DOUBLE PRECISION")
+    x_stds = py_list_to_sql_string(
+        summary["x_stds"], array_type="DOUBLE PRECISION")
+
+    return coeff, x_means, x_stds
 
 
 def _validate_args(source_table, output_table, summary_table, independent_varname,
                    dependent_varname, hidden_layer_sizes,
-                   optimizer_params, is_classification):
+                   optimizer_params, is_classification, weights, warm_start, activation):
     input_tbl_valid(source_table, "MLP")
-    output_tbl_valid(output_table, "MLP")
-    output_tbl_valid(summary_table, "MLP")
-    _assert(is_var_valid(source_table, independent_varname),
-            "MLP error: invalid independent_varname "
-            "('{independent_varname}') for source_table "
-            "({source_table})!".format(independent_varname=independent_varname,
-                                       source_table=source_table))
-
-    _assert(is_var_valid(source_table, dependent_varname),
-            "MLP error: invalid dependent_varname "
-            "('{dependent_varname}') for source_table "
-            "({source_table})!".format(dependent_varname=dependent_varname,
-                                       source_table=source_table))
-    _assert(hidden_layer_sizes is not None,
-            "hidden_layer_sizes may not be null")
-    _assert(isinstance(hidden_layer_sizes, list),
-            "hidden_layer_sizes must be an array of integers")
-    _assert(all(isinstance(value, int) for value in hidden_layer_sizes),
-            "MLP error: Hidden layers sizes must be integers")
-    _assert(all(value >= 0 for value in hidden_layer_sizes),
-            "MLP error: Hidden layers sizes must be greater than 0.")
+    if not warm_start:
+        output_tbl_valid(output_table, "MLP")
+        output_tbl_valid(summary_table, "MLP")
+
+    _assert(
+        is_var_valid(source_table, independent_varname),
+        "MLP error: invalid independent_varname "
+        "('{independent_varname}') for source_table "
+        "({source_table})!".format(
+            independent_varname=independent_varname,
+            source_table=source_table))
+
+    _assert(
+        is_var_valid(source_table, dependent_varname),
+        "MLP error: invalid dependent_varname "
+        "('{dependent_varname}') for source_table "
+        "({source_table})!".format(
+            dependent_varname=dependent_varname, source_table=source_table))
+    _assert(
+        isinstance(hidden_layer_sizes, list),
+        "hidden_layer_sizes must be an array of integers")
+    # TODO put this check earlier
+    _assert(
+        all(isinstance(value, int) for value in hidden_layer_sizes),
+        "MLP error: Hidden layers sizes must be integers")
+    _assert(
+        all(value >= 0 for value in hidden_layer_sizes),
+        "MLP error: Hidden layers sizes must be greater than 0.")
+    _assert(optimizer_params["lambda"] >= 0,
+            "MLP error: lambda should be greater than or equal to 0.")
     _assert(optimizer_params["tolerance"] >= 0,
-            "MLP error: Tolerance should be greater than or equal to 0.")
+            "MLP error: tolerance should be greater than or equal to 0.")
     _assert(optimizer_params["n_tries"] >= 1,
-            "MLP error: Number of tries should be greater than or equal to 1")
-    _assert(optimizer_params["n_iterations"] >= 1,
-            "MLP error: Number of iterations should be greater than or equal to 1")
-    _assert(optimizer_params["step_size"] > 0,
-            "MLP error: Stepsize should be greater than 0.")
+            "MLP error: n_tries should be greater than or equal to 1")
+    _assert(
+        optimizer_params["n_iterations"] >= 1,
+        "MLP error: n_iterations should be greater than or equal to 1")
+    _assert(optimizer_params["power"] > 0,
+            "MLP error: power should be greater than 0.")
+    _assert(0 < optimizer_params["gamma"] <= 1,
+            "MLP error: gamma should be between 0 and 1.")
+    _assert(optimizer_params["iterations_per_step"] > 0,
+            "MLP error: iterations_per_step should be greater than 0.")
+    _assert(optimizer_params["learning_rate_init"] > 0,
+            "MLP error: learning_rate_init should be greater than 0.")
     _assert("[]" in get_expr_type(independent_varname, source_table),
             "Independent variable column should refer to an array")
-    _assert(array_col_has_same_dimension(source_table, independent_varname),
-            "Independent variable column should refer to arrays of the same length")
+    _assert(
+        array_col_has_same_dimension(source_table, independent_varname),
+        "Independent variable column should refer to arrays of the same length"
+    )
+
+    int_types = ['integer', 'smallint', 'bigint']
+    float_types = ['double precision', 'real']
+    _assert(
+        get_expr_type(weights, source_table) in int_types + float_types,
+        "MLP error: Weights should be a numeric type")
 
     if is_classification:
         _validate_args_classification(source_table, dependent_varname)
     else:
         _validate_args_regression(source_table, dependent_varname)
-# ----------------------------------------------------------------------
 
 
-def _get_activation_function_name(activation_function):
-    if not activation_function:
-        activation_function = 'sigmoid'
+def _get_learning_rate_policy_name(learning_rate_policy):
+    if not learning_rate_policy:
+        learning_rate_policy = 'constant'
+    else:
+        supported_learning_rate_policies = ['constant', 'exp', 'inv', 'step']
+        try:
+            learning_rate_policy = next(
+                x for x in supported_learning_rate_policies
+                if x.startswith(learning_rate_policy))
+        except StopIteration:
+            plpy.error(
+                "MLP Error: Invalid learning rate policy: "
+                "{0}. Supported learning rate policies are ({1})".format(
+                    learning_rate_policy,
+                    ','.join(sorted(supported_learning_rate_policies))))
+    return learning_rate_policy
+
+
+def _get_activation_function_name(activation):
+    if not activation:
+        activation = 'sigmoid'
     else:
-        # Add non-linear kernels below after implementing them.
         supported_activation_function = ['sigmoid', 'tanh', 'relu']
         try:
-            # allow user to specify a prefix substring of
-            # supported kernels. This works because the supported
-            # kernels have unique prefixes.
-            activation_function = next(x for x in supported_activation_function
-                                       if x.startswith(activation_function))
+            activation = next(
+                x for x in supported_activation_function
+                if x.startswith(activation))
         except StopIteration:
-            # next() returns a StopIteration if no element found
             plpy.error("MLP Error: Invalid activation function: "
-                       "{0}. Supported activation functions are ({1})"
-                       .format(activation_function, ','.join(
-                           sorted(supported_activation_function))))
-    return activation_function
-# ------------------------------------------------------------------------------
+                       "{0}. Supported activation functions are ({1})".format(
+                           activation,
+                           ','.join(sorted(supported_activation_function))))
+    return activation
 
 
 def _get_activation_index(activation_name):
@@ -333,12 +535,15 @@ def _format_label(label):
     if isinstance(label, str):
         return "'" + label + "'"
     return label
-# -------------------------------------------------------------------------
 
 
-def mlp_predict(schema_madlib, model_table, data_table,
-                id_col_name, output_table,
-                pred_type='response', **kwargs):
+def mlp_predict(schema_madlib,
+                model_table,
+                data_table,
+                id_col_name,
+                output_table,
+                pred_type='response',
+                **kwargs):
     """ Score new observations using a trained neural network
 
     @param schema_madlib Name of the schema where MADlib is installed
@@ -356,13 +561,7 @@ def mlp_predict(schema_madlib, model_table, data_table,
     input_tbl_valid(model_table, 'MLP')
     cols_in_tbl_valid(model_table, ['coeff'], 'MLP')
     summary_table = add_postfix(model_table, "_summary")
-    input_tbl_valid(summary_table, 'MLP')
-    cols_in_tbl_valid(summary_table,
-                      ['dependent_varname', 'independent_varname',
-                       'activation_function',
-                       'tolerance', 'step_size', 'n_iterations',
-                       'n_tries', 'classes', 'layer_sizes', 'source_table'],
-                      'MLP')
+    _validate_summary_table(summary_table)
 
     summary = plpy.execute("SELECT * FROM {0}".format(summary_table))[0]
     coeff = py_list_to_sql_string(plpy.execute(
@@ -370,106 +569,116 @@ def mlp_predict(schema_madlib, model_table, data_table,
     dependent_varname = summary['dependent_varname']
     independent_varname = summary['independent_varname']
     source_table = summary['source_table']
-    activation_function = _get_activation_index(summary['activation_function'])
+    activation = _get_activation_index(summary['activation'])
     layer_sizes = py_list_to_sql_string(
         summary['layer_sizes'], array_type="DOUBLE PRECISION")
     is_classification = int(summary["is_classification"])
     is_response = int(pred_type == 'response')
+    x_means = py_list_to_sql_string(
+        summary["x_means"], array_type="DOUBLE PRECISION")
+    x_stds = py_list_to_sql_string(
+        summary["x_stds"], array_type="DOUBLE PRECISION")
 
-    pred_name = ('"prob_{0}"' if pred_type == "prob" else
-                 '"estimated_{0}"').format(dependent_varname.replace('"', '').strip())
+    pred_name = (
+        '"prob_{0}"' if pred_type == "prob" else
+        '"estimated_{0}"').format(dependent_varname.replace('"', '').strip())
 
     input_tbl_valid(data_table, 'MLP')
 
-    _assert(is_var_valid(data_table, independent_varname),
-            "MLP Error: independent_varname ('{0}') is invalid for data_table ({1})".
-            format(independent_varname, data_table))
+    _assert(
+        is_var_valid(data_table, independent_varname),
+        "MLP Error: independent_varname ('{0}') is invalid for data_table ({1})".
+        format(independent_varname, data_table))
     _assert(id_col_name is not None, "MLP Error: id_col_name is NULL")
-    _assert(is_var_valid(data_table, id_col_name),
-            "MLP Error: id_col_name ('{0}') is invalid for {1}".
-            format(id_col_name, data_table))
+    _assert(
+        is_var_valid(data_table, id_col_name),
+        "MLP Error: id_col_name ('{0}') is invalid for {1}".format(
+            id_col_name, data_table))
     output_tbl_valid(output_table, 'MLP')
 
-    with MinWarning("warning"):
-        header = "CREATE TABLE " + output_table + " AS "
-        # Regression
-        if not is_classification:
-            dependent_type = get_expr_type(dependent_varname, source_table)
-            unnest_if_not_array = ""
-            # Return the same type as the user provided.  Internally we always use an array, but
-            # if they provided a scaler, unnest it for the user
-            if "[]" not in dependent_type:
-                unnest_if_not_array = "UNNEST"
+    header = "CREATE TABLE " + output_table + " AS "
+    # Regression
+    if not is_classification:
+        dependent_type = get_expr_type(dependent_varname, source_table)
+        unnest_if_not_array = ""
+        # Return the same type as the user provided.  Internally we always
+        # use an array, but if they provided a scaler, unnest it for
+        # the user
+        if "[]" not in dependent_type:
+            unnest_if_not_array = "UNNEST"
+        sql = header + """
+            SELECT {id_col_name},
+                   {unnest_if_not_array}({schema_madlib}.internal_predict_mlp(
+                        {coeff},
+                        {independent_varname}::DOUBLE PRECISION[],
+                        {is_classification},
+                        {activation},
+                        {layer_sizes},
+                        {is_response},
+                        {x_means},
+                        {x_stds}
+                    )) as {pred_name}
+            FROM {data_table}
+            """
+    else:
+        summary_query = """
+        SELECT classes FROM {0}
+        """.format(summary_table)
+        classes = plpy.execute(summary_query)[0]['classes']
+        if pred_type == "response":
+            classes_with_index_table = unique_string()
+            classes_table = unique_string()
             sql = header + """
-                SELECT {id_col_name},
-                       {unnest_if_not_array}({schema_madlib}.internal_predict_mlp(
-                            {coeff},
-                            {independent_varname}::DOUBLE PRECISION[],
-                            {is_classification},
-                            {activation_function},
-                            {layer_sizes},
-                            {is_response}
-                        )) as {pred_name}
-                FROM {data_table}
+                    SELECT
+                         q.{id_col_name}
+                        ,(ARRAY{classes})[pred_idx[1]+1] as {pred_name}
+                    FROM (
+                         SELECT
+                            {id_col_name},
+                            {schema_madlib}.internal_predict_mlp(
+                                    {coeff}::DOUBLE PRECISION[],
+                                    {independent_varname}::DOUBLE PRECISION[],
+                                    {is_classification},
+                                    {activation},
+                                    {layer_sizes},
+                                    {is_response},
+                                    {x_means},
+                                    {x_stds}
+                                    )
+                           as pred_idx
+                        FROM {data_table}
+                    ) q
                 """
         else:
-            summary_query = """
-            SELECT classes FROM {0}
-            """.format(summary_table)
-            classes = plpy.execute(summary_query)[0]['classes']
-            if pred_type == "response":
-                # This join is to recover the class name from the summary table,
-                #  as prediction just returns an index
-                classes_with_index_table = unique_string()
-                classes_table = unique_string()
-                sql = header + """
-                        SELECT
-                             q.{id_col_name}
-                            ,(ARRAY{classes})[pred_idx[1]+1] as {pred_name}
-                        FROM (
-                             SELECT
-                                {id_col_name},
-                                {schema_madlib}.internal_predict_mlp(
-                                        {coeff}::DOUBLE PRECISION[],
-                                        {independent_varname}::DOUBLE PRECISION[],
-                                        {is_classification},
-                                        {activation_function},
-                                        {layer_sizes},
-                                        {is_response}
-                                        )
-                               as pred_idx
-                            FROM {data_table}
-                        ) q
-                    """
-            else:
-                # Incomplete
-                intermediate_col = unique_string()
-                score_format = ',\n'.join([
-                    'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
-                    format(j=i + 1, c_str=str(c).strip(' "'),
-                           interim=intermediate_col)
-                    for i, c in enumerate(classes)])
-                sql = header + """
-                    SELECT
-                        {id_col_name},
-                        {score_format}
-                        FROM (
-                            SELECT {id_col_name},
-                                   {schema_madlib}.internal_predict_mlp(
-                                       {coeff}::DOUBLE PRECISION[],
-                                       {independent_varname}::DOUBLE PRECISION[],
-                                       {is_classification},
-                                       {activation_function},
-                                       {layer_sizes},
-                                       {is_response}
-                                       )::TEXT[]
-                                            AS {intermediate_col}
-                            FROM {data_table}
-                        ) q
-                    """
+            # Incomplete
+            intermediate_col = unique_string()
+            score_format = ',\n'.join([
+                'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
+                format(j=i + 1, c_str=str(c).strip(' "'),
+                       interim=intermediate_col)
+                for i, c in enumerate(classes)])
+            sql = header + """
+                SELECT
+                    {id_col_name},
+                    {score_format}
+                    FROM (
+                        SELECT {id_col_name},
+                               {schema_madlib}.internal_predict_mlp(
+                                   {coeff}::DOUBLE PRECISION[],
+                                   {independent_varname}::DOUBLE PRECISION[],
+                                   {is_classification},
+                                   {activation},
+                                   {layer_sizes},
+                                   {is_response},
+                                   {x_means},
+                                   {x_stds}
+                                   )::TEXT[]
+                                        AS {intermediate_col}
+                        FROM {data_table}
+                    ) q
+                """
     sql = sql.format(**locals())
     plpy.execute(sql)
-# ----------------------------------------------------------------------
 
 
 def mlp_help(schema_madlib, message, is_classification):
@@ -511,34 +720,44 @@ def mlp_help(schema_madlib, message, is_classification):
                                     USAGE
     ---------------------------------------------------------------------------
     SELECT {schema_madlib}.{method}(
-        source_table,         -- name of input table
-        output_table,         -- name of output model table
-        independent_varname,  -- name of independent variable
-        dependent_varname,    -- {label_description}
-        hidden_layer_sizes,   -- Array of integers indicating the
+        source_table,         -- TEXT. name of input table
+        output_table,         -- TEXT. name of output model table
+        independent_varname,  -- TEXT. name of independent variable
+        dependent_varname,    -- TEXT. {label_description}
+        hidden_layer_sizes,   -- INTEGER[]. Array of integers indicating the
                                  number of hidden units per layer.
                                  Length equal to the number of hidden layers.
-        optimizer_params,     -- optional, default NULL
+        optimizer_params,     -- TEXT. optional, default NULL
                                  parameters for optimization in
                                  a comma-separated string of key-value pairs.
+                                 To find out more:
+
+                      SELECT {schema_madlib}.{method}('optimizer_params')
 
-            step_size DOUBLE PRECISION, -- Default: 0.001
-                                           Learning rate
-            n_iterations INTEGER,       -- Default: 100
-                                           Number of iterations per try
-            n_tries INTEGER,            -- Default: 1
-                                           Total number of training cycles,
-                                           with random initializations to avoid
-                                           local minima.
-            tolerance DOUBLE PRECISION, -- Default: 0.001
-                                           If the distance in loss between
-                                           two iterations is less than the
-                                           tolerance training will stop, even if
-                                           n_iterations has not been reached
-
-        activation            -- optional, default: 'sigmoid'.
+        activation            -- TEXT. optional, default: 'sigmoid'.
                                  supported activations: 'relu', 'sigmoid',
                                  and 'tanh'
+
+        weights               -- TEXT. optional, default: NULL.
+                                 Weights for input rows. Column name which
+                                 specifies the weight for each input row.
+                                 This weight will be incorporated into the
+                                 update during SGD, and will not be used
+                                 for loss calculations. If not specified,
+                                 weight for each row will default to 1.
+                                 Column should be a numeric type.
+
+        warm_start            -- BOOLEAN. optional, default: FALSE.
+                                 Initalize weights with the coefficients from
+                                 the last call.  If true, weights will
+                                 be initialized from output_table. Note that
+                                 all parameters other than optimizer_params,
+                                 and verbose must remain constant between calls
+                                 to warm_start.
+
+        verbose               -- BOOLEAN. optional, default: FALSE
+                                 Provides verbose output of the results of
+                                 training.
     );
 
 
@@ -576,22 +795,29 @@ def mlp_help(schema_madlib, message, is_classification):
     {1,0.09378,12.50,7.870,0,0.5240,5.8890,39.00,5.4509,5,311.0,15.20,390.50,15.71} | 1 | 21.70
     \.
 
-    - Generate a multilayer perception with a two hidden layers of 5 units
+    - Generate a multilayer perception with a two hidden layers of 25 units
     each. Use the x column as the independent variables, and use the class
-    column as the classification. Set the tolerance to 0 so that 300
+    column as the classification. Set the tolerance to 0 so that 500
     iterations will be run. Use a sigmoid activation function.
     The model will be written to mlp_regress_result.
 
-    SELECT mlp_regression(
-        'lin_housing_wi',           -- Source table
-        'mlp_regress_result',  -- Desination table
-        'x',                        -- Independent variable
-        'y',                        -- Dependent variable
-        ARRAY[5,5],                 -- Number of hidden units per layer
-        'step_size=0.007,
-        n_iterations=300,
+    DROP TABLE IF EXISTS mlp_regress;
+    DROP TABLE IF EXISTS mlp_regress_summary;
+    SELECT madlib.mlp_regression(
+        'lin_housing',         -- Source table
+        'mlp_regress',         -- Desination table
+        'x',                   -- Input features
+        'y',                   -- Dependent variable
+        ARRAY[25,25],            -- Number of units per layer
+        'learning_rate_init=0.001,
+        n_iterations=500,
+        lambda=0.001,
         tolerance=0',
-        'sigmoid');                 -- Activation
+        'relu',
+        NULL,             -- Default weight (1)
+        FALSE,            -- No warm start
+        TRUE              -- Verbose
+    );
 
     """
 
@@ -630,29 +856,78 @@ def mlp_help(schema_madlib, message, is_classification):
 
     -- Generate a multilayer perception with a single hidden layer of 5 units.
     Use the attributes column as the independent variables, and use the class
-    column as the classification. Set the tolerance to 0 so that 1000
+    column as the classification. Set the tolerance to 0 so that 500
     iterations will be run. Use a hyperbolic tangent activation function.
-    The model will be written to mlp_result.
+    The model will be written to mlp_model.
 
-    SELECT {schema_madlib}.mlp_classification(
+    DROP TABLE IF EXISTS mlp_model;
+    DROP TABLE IF EXISTS mlp_model_summary;
+    SELECT madlib.mlp_classification(
         'iris_data',      -- Source table
         'mlp_model',      -- Destination table
         'attributes',     -- Input features
         'class_text',     -- Label
         ARRAY[5],         -- Number of units per layer
-        'step_size=0.003,
-        n_iterations=5000,
+        'learning_rate_init=0.003,
+        n_iterations=500,
         tolerance=0',     -- Optimizer params
-        'tanh');          -- Activation function
+        'tanh',           -- Activation function
+        NULL,             -- Default weight (1)
+        FALSE,            -- No warm start
+        TRUE              -- Verbose
+    );
+
+    SELECT * FROM mlp_model;
 
     """.format(**args)
     example = classification_example if is_classification else regression_example
+    optimizer_params = """
+    ------------------------------------------------------------------------------------------------
+                                               OPTIMIZER PARAMS
+    ------------------------------------------------------------------------------------------------
+    learning_rate_init DOUBLE PRECISION, -- Default: 0.001
+                                            Initial learning rate
+    learning_rate_policy VARCHAR,        -- Default: 'constant'
+                                            One of 'constant','exp','inv','step'
+                                            'constant': learning_rate =
+                                            learning_rate_init
+                                            'exp': learning_rate =
+                                            learning_rate_init * gamma^(iter)
+                                            'inv': learning_rate =
+                                            learning_rate_init * (iter+1)^(-power)
+                                            'step': learning_rate =
+                                            learning_rate_init * gamma^(floor(iter/iterations_per_step))
+                                            Where iter is the current iteration of SGD.
+    gamma DOUBLE PRECISION,              -- Default: '0.1'
+                                            Decay rate for learning rate.
+                                            Valid for learning_rate_policy = 'exp', or 'step'
+    power DOUBLE PRECISION,              -- Default: '0.5'
+                                            Exponent for learning_rate_policy = 'inv'
+    iterations_per_step INTEGER,             -- Default: '100'
+                                            Number of iterations to run before decreasing the learning
+                                            rate by a factor of gamma.  Valid for learning rate
+                                            policy = 'step'
+    n_iterations INTEGER,                -- Default: 100
+                                            Number of iterations per try
+    n_tries INTEGER,                     -- Default: 1
+                                            Total number of training cycles,
+                                            with random initializations to avoid
+                                            local minima.
+    tolerance DOUBLE PRECISION,          -- Default: 0.001
+                                            If the distance in loss between
+                                            two iterations is less than the
+                                            tolerance training will stop, even if
+                                            n_iterations has not been reached.
+    """.format(**args)
+
     if not message:
         return summary
     elif message.lower() in ('usage', 'help', '?'):
         return usage
     elif message.lower() == 'example':
         return example
+    elif message.lower() == 'optimizer_params':
+        return optimizer_params
     return """
         No such option. Use "SELECT {schema_madlib}.{method}()" for help.
     """.format(**args)

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/b7fdb804/src/ports/postgres/modules/convex/test/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in
index 97541a9..2302252 100644
--- a/src/ports/postgres/modules/convex/test/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/test/mlp.sql_in
@@ -28,7 +28,7 @@
 -- Classification
 
 
-SELECT setseed(0.5);
+SELECT setseed(0.6);
 DROP TABLE IF EXISTS iris_data, iris_test, mlp_class, mlp_class_summary CASCADE;
 CREATE TABLE iris_data(
     id integer,
@@ -191,21 +191,27 @@ INSERT INTO iris_data VALUES
 
 
 SELECT mlp_classification(
-    'iris_data',      -- Source table
+    'iris_data',    -- Source table
     'mlp_class',    -- Desination table
-    'attributes',     -- Input features
-    'class',   -- Label
-    ARRAY[5],     -- Number of units per layer
-    'step_size=0.001,
-    n_iterations=1000,
+    'attributes',   -- Input features
+    'class',        -- Label
+    ARRAY[5],   -- Number of units per layer
+    'learning_rate_init=0.1,
+    learning_rate_policy=constant,
+    n_iterations=800,
+    n_tries=2,
     tolerance=0',
-    'tanh');
+    'sigmoid',
+    '',
+    FALSE,
+    TRUE
+);
 
 
 SELECT assert(
     -- Loss will improve much more if more iterations are run
-    loss < 30,
-    'MLP: Loss is too high (> 30). Wrong result.'
+    loss < 0.1,
+    'MLP: Loss is too high (> 0). Wrong result.'
 ) FROM mlp_class;
 
 DROP TABLE IF EXISTS mlp_prediction;
@@ -239,9 +245,8 @@ SELECT mlp_predict(
     'mlp_prediction',
     'response');
 
-select * from mlp_prediction;
+SELECT * FROM mlp_prediction;
 SELECT assert(
-    -- Accuracy greater than 90%
     COUNT(*)/150.0 > 0.95,
     'MLP: Accuracy is too low (< 95%). Wrong result.'
 ) FROM
@@ -766,65 +771,30 @@ COPY lin_housing_wi (x, grp_by_col, y) FROM STDIN NULL '?' DELIMITER '|';
 {1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88} | 2 | 11.90
 \.
 
--- Normalize the columns
-CREATE TEMPORARY TABLE maxs as(
-    SELECT
-    max(x[1]) m1,
-    max(x[2]) m2,
-    max(x[3]) m3,
-    max(x[4]) m4,
-    max(x[5]) m5,
-    max(x[6]) m6,
-    max(x[7]) m7,
-    max(x[8]) m8,
-    max(x[9]) m9,
-    max(x[10]) m10,
-    max(x[11]) m11,
-    max(x[12]) m12,
-    max(x[13]) m13,
-    max(x[14]) m14
-    from lin_housing_wi
-);
 
-CREATE TABLE lin_housing_wi_scaled AS
-SELECT ARRAY[
-    x[1]/(SELECT m1 from maxs),
-    x[2]/(SELECT m2 from maxs),
-    x[3]/(SELECT m3 from maxs),
-    x[4]/(SELECT m4 from maxs),
-    x[5]/(SELECT m5 from maxs),
-    x[6]/(SELECT m6 from maxs),
-    x[7]/(SELECT m7 from maxs),
-    x[8]/(SELECT m8 from maxs),
-    x[9]/(SELECT m9 from maxs),
-    x[10]/(SELECT m10 from maxs),
-    x[11]/(SELECT m11 from maxs),
-    x[12]/(SELECT m12 from maxs),
-    x[13]/(SELECT m13 from maxs),
-    x[14]/(SELECT m14 from maxs)] as x,
-    id,y
-FROM lin_housing_wi;
-
-DROP TABLE IF EXISTS maxs;
 DROP TABLE IF EXISTS mlp_regress;
 DROP TABLE IF EXISTS mlp_regress_summary;
 
 
 SELECT setseed(0);
 SELECT mlp_regression(
-    'lin_housing_wi_scaled',           -- Source table
+    'lin_housing_wi',           -- Source table
     'mlp_regress',              -- Desination table
     'x',                        -- Input features
     'y',                        -- Dependent variable
-    ARRAY[5,5],                 -- Number of units per layer
-    'step_size=0.005,
-    n_iterations=800,
+    ARRAY[40],                 -- Number of units per layer
+    'learning_rate_init=0.015,
+    learning_rate_policy=inv,
+    n_iterations=300,
     tolerance=0',
-    'sigmoid');
+    'sigmoid',
+    '',
+    False,
+    TRUE);
 
 
 SELECT assert(
-    loss < 10,
+    loss < 2,
     'MLP: Loss is too high (> 10). Wrong result.'
 ) FROM mlp_regress;
 
@@ -832,14 +802,14 @@ SELECT assert(
 DROP TABLE IF EXISTS mlp_prediction_regress;
 SELECT mlp_predict(
     'mlp_regress',
-    'lin_housing_wi_scaled',
+    'lin_housing_wi',
     'id',
     'mlp_prediction_regress',
     'output');
 SELECT assert(
-    0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi_scaled.y,2.0))/506 < 10.0,
+    0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi.y,2.0))/506 < 2.0,
     'MLP: Predict MSE is too high (> 10). Wrong result'
 )
-FROM mlp_prediction_regress JOIN lin_housing_wi_scaled
-ON mlp_prediction_regress.id = lin_housing_wi_scaled.id;
-DROP TABLE IF EXISTS lin_housing_wi_scaled;
+FROM mlp_prediction_regress JOIN lin_housing_wi
+ON mlp_prediction_regress.id = lin_housing_wi.id;
+DROP TABLE IF EXISTS lin_housing_wi;

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/b7fdb804/src/ports/postgres/modules/utilities/utilities.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index b28a5f3..c1670b5 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -54,6 +54,18 @@ def is_orca():
 # ------------------------------------------------------------------------------
 
 
+def _assert_equal(o1, o2, msg):
+    """
+    @brief if the given objects are not equal, then raise an error with the message
+    @param o1           the first object
+    @param o2           the second object
+    @param msg          the error message to be reported
+    """
+    if not o1 == o2:
+        plpy.error(msg)
+# ------------------------------------------------------------------------------
+
+
 def _assert(condition, msg):
     """
     @brief if the given condition is false, then raise an error with the message