You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2017/08/14 17:37:20 UTC
[1/2] incubator-madlib git commit: MLP: Add multiple enhancements
Repository: incubator-madlib
Updated Branches:
refs/heads/master 6f6f804b2 -> ff1b0f883
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 6cea7b0..550d630 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -16,7 +16,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
"""
@file mlp_igd.py_in
@@ -24,17 +23,18 @@
@namespace mlp_igd
"""
+import math
import plpy
-from utilities.control import MinWarning
from utilities.utilities import add_postfix
from utilities.utilities import py_list_to_sql_string
from utilities.utilities import extract_keyvalue_params
from utilities.utilities import _assert
+from utilities.utilities import _assert_equal
from utilities.utilities import unique_string
from utilities.utilities import strip_end_quotes
-
from utilities.validate_args import cols_in_tbl_valid
+from utilities.validate_args import table_exists
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import is_var_valid
from utilities.validate_args import output_tbl_valid
@@ -42,10 +42,14 @@ from utilities.validate_args import get_expr_type
from utilities.validate_args import array_col_has_same_dimension
from utilities.validate_args import array_col_dimension
+from convex.utils_regularization import __utils_ind_var_scales
+
+from elastic_net.elastic_net_utils import _tbl_dimension_rownum
+
def mlp(schema_madlib, source_table, output_table, independent_varname,
- dependent_varname, hidden_layer_sizes,
- optimizer_param_str, activation, is_classification, **kwargs):
+ dependent_varname, hidden_layer_sizes, optimizer_param_str, activation,
+ is_classification, weights, warm_start, verbose=False):
"""
Args:
@param schema_madlib
@@ -59,62 +63,128 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
Returns:
None
"""
- with MinWarning('warning'):
- optimizer_params = _get_optimizer_params(optimizer_param_str or "")
- summary_table = add_postfix(output_table, "_summary")
- _validate_args(source_table, output_table, summary_table, independent_varname,
- dependent_varname, hidden_layer_sizes,
- optimizer_params, is_classification)
-
- current_iteration = 1
- prev_state = None
- tolerance = optimizer_params["tolerance"]
- n_iterations = optimizer_params["n_iterations"]
- step_size = optimizer_params["step_size"]
- n_tries = optimizer_params["n_tries"]
- activation_name = _get_activation_function_name(activation)
- activation_index = _get_activation_index(activation_name)
- num_input_nodes = array_col_dimension(
- source_table, independent_varname)
- num_output_nodes = 0
- classes = []
- dependent_type = get_expr_type(dependent_varname, source_table)
- original_dependent_varname = dependent_varname
-
- if is_classification:
- dependent_variable_sql = """
- SELECT DISTINCT {dependent_varname}
- FROM {source_table}
- """.format(dependent_varname=dependent_varname,
- source_table=source_table)
- labels = plpy.execute(dependent_variable_sql)
- one_hot_dependent_varname = 'ARRAY['
- num_output_nodes = len(labels)
- for label_obj in labels:
- label = _format_label(label_obj[dependent_varname])
- classes.append(label)
- one_hot_dependent_varname += dependent_varname + \
- "=" + str(label) + ","
- # Remove the last comma
- one_hot_dependent_varname = one_hot_dependent_varname[:-1]
- one_hot_dependent_varname += ']::integer[]'
- dependent_varname = one_hot_dependent_varname
- else:
- if "[]" not in dependent_type:
- dependent_varname = "ARRAY[" + dependent_varname + "]"
- num_output_nodes = array_col_dimension(
- source_table, dependent_varname)
- layer_sizes = [num_input_nodes] + \
- hidden_layer_sizes + [num_output_nodes]
+ warm_start = bool(warm_start)
+ optimizer_params = _get_optimizer_params(optimizer_param_str or "")
+ summary_table = add_postfix(output_table, "_summary")
+ weights = '1' if not weights or not weights.strip() else weights.strip()
+ hidden_layer_sizes = hidden_layer_sizes or []
+ activation = _get_activation_function_name(activation)
+ learning_rate_policy = _get_learning_rate_policy_name(
+ optimizer_params["learning_rate_policy"])
+ activation_index = _get_activation_index(activation)
+
+ _validate_args(source_table, output_table, summary_table, independent_varname,
+ dependent_varname, hidden_layer_sizes,
+ optimizer_params, is_classification, weights,
+ warm_start, activation)
+
+ current_iteration = 1
+ prev_state = None
+ tolerance = optimizer_params["tolerance"]
+ n_iterations = optimizer_params["n_iterations"]
+ step_size_init = optimizer_params["learning_rate_init"]
+ iterations_per_step = optimizer_params["iterations_per_step"]
+ power = optimizer_params["power"]
+ gamma = optimizer_params["gamma"]
+ step_size = step_size_init
+ n_tries = optimizer_params["n_tries"]
+ # lambda is a reserved word in python
+ lmbda = optimizer_params["lambda"]
+ iterations_per_step = optimizer_params["iterations_per_step"]
+ num_input_nodes = array_col_dimension(source_table,
+ independent_varname)
+ num_output_nodes = 0
+ classes = []
+ dependent_type = get_expr_type(dependent_varname, source_table)
+ original_dependent_varname = dependent_varname
+ dimension, n_tuples = _tbl_dimension_rownum(
+ schema_madlib, source_table, independent_varname)
+ x_scales = __utils_ind_var_scales(
+ source_table, independent_varname, dimension, schema_madlib)
+ x_means = py_list_to_sql_string(
+ x_scales["mean"], array_type="DOUBLE PRECISION")
+ filtered_stds = [x if x != 0 else 1 for x in x_scales["std"]]
+ x_stds = py_list_to_sql_string(
+ filtered_stds, array_type="DOUBLE PRECISION")
+ if is_classification:
+ dependent_variable_sql = """
+ SELECT DISTINCT {dependent_varname}
+ FROM {source_table}
+ """.format(
+ dependent_varname=dependent_varname, source_table=source_table)
+ labels = plpy.execute(dependent_variable_sql)
+ one_hot_dependent_varname = 'ARRAY['
+ num_output_nodes = len(labels)
+ for label_obj in labels:
+ label = _format_label(label_obj[dependent_varname])
+ classes.append(label)
+ classes.sort()
+ for c in classes:
+ one_hot_dependent_varname += dependent_varname + \
+ "=" + str(c) + ","
+ # Remove the last comma
+ one_hot_dependent_varname = one_hot_dependent_varname[:-1]
+ one_hot_dependent_varname += ']::integer[]'
+ dependent_varname = one_hot_dependent_varname
+ else:
+ if "[]" not in dependent_type:
+ dependent_varname = "ARRAY[" + dependent_varname + "]"
+ num_output_nodes = array_col_dimension(
+ source_table, dependent_varname)
+ layer_sizes = [num_input_nodes] + \
+ hidden_layer_sizes + [num_output_nodes]
+
+ # Need layers sizes before validating for warm_start
+ coeff = []
+ for i in range(len(layer_sizes) - 1):
+ fan_in = layer_sizes[i]
+ fan_out = layer_sizes[i + 1]
+ # Initalize according to Glorot and Bengio (2010)
+ # See design doc for more info
+ span = math.sqrt(6.0 / (fan_in + fan_out))
+ dim = (layer_sizes[i] + 1) * layer_sizes[i + 1]
+ rand = plpy.execute("""SELECT array_agg({span}*(random()-0.5))
+ AS random
+ FROM generate_series(0,{dim})
+ """.format(span=span, dim=dim))[0]["random"]
+ coeff += rand
+
+ if warm_start:
+ coeff, x_means, x_stds = _validate_warm_start(
+ source_table, output_table, summary_table, independent_varname,
+ original_dependent_varname, layer_sizes, optimizer_params,
+ is_classification, weights, warm_start, activation)
+ plpy.execute("DROP TABLE IF EXISTS {0}".format(output_table))
+ plpy.execute("DROP TABLE IF EXISTS {0}".format(summary_table))
+ best_state = []
+ best_loss = [float('inf')]
+ prev_loss = float('inf')
+ loss = None
+ for _ in range(n_tries):
while True:
if prev_state:
prev_state_str = py_list_to_sql_string(
prev_state, array_type="double precision")
else:
prev_state_str = "(NULL)::DOUBLE PRECISION[]"
+ # else block is for "constant", so don't do anything
+ zero_indexed_iteration = current_iteration - 1
+ if learning_rate_policy == "exp":
+ step_size = step_size_init * gamma**zero_indexed_iteration
+ elif learning_rate_policy == "inv":
+ step_size = step_size_init * (current_iteration)**(-power)
+ elif learning_rate_policy == "step":
+ step_size = step_size_init * gamma**(
+ math.floor(zero_indexed_iteration / iterations_per_step))
+
+
train_sql = """
SELECT
+ (result).state as state,
+ (result).loss as loss
+ FROM (
+ SELECT
{schema_madlib}.mlp_igd_step(
({independent_varname})::DOUBLE PRECISION[],
({dependent_varname})::DOUBLE PRECISION[],
@@ -122,105 +192,153 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
{layer_sizes},
({step_size})::FLOAT8,
{activation},
- {is_classification}) as curr_state
- FROM {source_table} AS _src
- """.format(schema_madlib=schema_madlib,
- independent_varname=independent_varname,
- dependent_varname=dependent_varname,
- prev_state=prev_state_str,
- # C++ uses double internally
- layer_sizes=py_list_to_sql_string(layer_sizes,
- array_type="double precision"),
- step_size=step_size,
- source_table=source_table,
- activation=activation_index,
- is_classification=int(is_classification))
- curr_state = plpy.execute(train_sql)[0]["curr_state"]
- dist_sql = """
- SELECT {schema_madlib}.internal_mlp_igd_distance(
- {prev_state},
- {curr_state}) as state_dist
- """.format(schema_madlib=schema_madlib,
- prev_state=prev_state_str,
- curr_state=py_list_to_sql_string(curr_state, "double precision"))
- state_dist = plpy.execute(dist_sql)[0]["state_dist"]
- if ((state_dist and state_dist < tolerance) or
- current_iteration > n_iterations):
+ {is_classification},
+ ({weights})::DOUBLE PRECISION,
+ {warm_start},
+ ({warm_start_coeff})::DOUBLE PRECISION[],
+ {n_tuples},
+ {lmbda},
+ {x_means},
+ {x_stds}
+ ) as result
+ FROM {source_table} as _src) _step_q
+ """.format(
+ schema_madlib=schema_madlib,
+ independent_varname=independent_varname,
+ dependent_varname=dependent_varname,
+ prev_state=prev_state_str,
+ # c++ uses double internally
+ layer_sizes=py_list_to_sql_string(
+ layer_sizes, array_type="DOUBLE PRECISION"),
+ step_size=step_size,
+ source_table=source_table,
+ activation=activation_index,
+ is_classification=int(is_classification),
+ weights=weights,
+ warm_start=warm_start,
+ warm_start_coeff=py_list_to_sql_string(
+ coeff, array_type="DOUBLE PRECISION"),
+ n_tuples=n_tuples,
+ lmbda=lmbda,
+ x_means=x_means,
+ x_stds=x_stds)
+ step_result = plpy.execute(train_sql)[0]
+ curr_state = step_result['state']
+ loss = step_result['loss']
+ if verbose and 1 < current_iteration <= n_iterations:
+ plpy.info("Iteration: " + str(current_iteration -
+ 1) + ", Loss: " + str(loss))
+ state_dist = abs(loss-prev_loss)
+ if ((state_dist and state_dist < tolerance)
+ or current_iteration > n_iterations):
break
prev_state = curr_state
+ prev_loss = loss
current_iteration += 1
- _build_model_table(schema_madlib, output_table,
- curr_state, n_iterations)
- layer_sizes_str = py_list_to_sql_string(
- layer_sizes, array_type="integer")
- classes_str = py_list_to_sql_string(
- [strip_end_quotes(cl, "'") for cl in classes],
- array_type=dependent_type)
- summary_table_creation_query = """
- CREATE TABLE {summary_table}(
- source_table TEXT,
- independent_varname TEXT,
- dependent_varname TEXT,
- tolerance FLOAT,
- step_size FLOAT,
- n_iterations INTEGER,
- n_tries INTEGER,
- layer_sizes INTEGER[],
- activation_function TEXT,
- is_classification BOOLEAN,
- classes {dependent_type}[]
- )""".format(summary_table=summary_table,
- dependent_type=dependent_type)
-
- summary_table_update_query = """
- INSERT INTO {summary_table} VALUES(
- '{source_table}',
- '{independent_varname}',
- '{original_dependent_varname}',
- {tolerance},
- {step_size},
- {n_iterations},
- {n_tries},
- {layer_sizes_str},
- '{activation_name}',
- {is_classification},
- {classes_str}
- )
- """.format(**locals())
- plpy.execute(summary_table_creation_query)
- plpy.execute(summary_table_update_query)
-# ----------------------------------------------------------------------
-
-
-def _build_model_table(schema_madlib, output_table, final_state, n_iterations):
+ # We use previous state because the last iteration
+ # just calculates loss
+ if loss < best_loss:
+ best_state = prev_state
+ best_loss = loss
+ current_iteration = 1
+ prev_state = None
+ _build_model_table(schema_madlib, output_table, best_state,
+ best_loss, n_iterations)
+ layer_sizes_str = py_list_to_sql_string(
+ layer_sizes, array_type="integer")
+ classes_str = py_list_to_sql_string(
+ [strip_end_quotes(cl, "'") for cl in classes],
+ array_type=dependent_type)
+ summary_table_creation_query = """
+ CREATE TABLE {summary_table}(
+ source_table TEXT,
+ independent_varname TEXT,
+ dependent_varname TEXT,
+ tolerance FLOAT,
+ learning_rate_init FLOAT,
+ learning_rate_policy TEXT,
+ n_iterations INTEGER,
+ n_tries INTEGER,
+ layer_sizes INTEGER[],
+ activation TEXT,
+ is_classification BOOLEAN,
+ classes {dependent_type}[],
+ weights VARCHAR,
+ x_means DOUBLE PRECISION[],
+ x_stds DOUBLE PRECISION[]
+ )""".format(summary_table=summary_table,
+ dependent_type=dependent_type)
+
+ summary_table_update_query = """
+ INSERT INTO {summary_table} VALUES(
+ '{source_table}',
+ '{independent_varname}',
+ '{original_dependent_varname}',
+ {tolerance},
+ {step_size_init},
+ '{learning_rate_policy}',
+ {n_iterations},
+ {n_tries},
+ {layer_sizes_str},
+ '{activation}',
+ {is_classification},
+ {classes_str},
+ '{weights}',
+ {x_means},
+ {x_stds}
+ )
+ """.format(**locals())
+ plpy.execute(summary_table_creation_query)
+ plpy.execute(summary_table_update_query)
+ return None
+
+
+def _get_loss(schema_madlib, state):
+ return plpy.execute("""
+ SELECT
+ (result).loss AS loss
+ FROM (
+ SELECT
+ {schema_madlib}.internal_mlp_igd_result(
+ {final_state_str}
+ ) AS result
+ ) rel_state_subq
+ """.format(
+ schema_madlib=schema_madlib,
+ final_state_str=py_list_to_sql_string(state)))[0]["loss"]
+
+
+def _build_model_table(schema_madlib, output_table, final_state, loss, n_iterations):
final_state_str = py_list_to_sql_string(
final_state, array_type="double precision")
model_table_query = """
- CREATE TABLE {output_table} AS
+ CREATE TABLE {output_table} AS
+ SELECT
+ (result).coeff as coeff,
+ {loss} as loss,
+ {n_iterations} as num_iterations
+ FROM (
SELECT
- (result).coeff AS coeff,
- (result).loss AS loss,
- {n_iterations} AS num_iterations
- -- (result).num_rows_processed AS num_rows_processed,
- -- n_tuples_including_nulls - (result).num_rows_processed
- FROM (
- SELECT
- {schema_madlib}.internal_mlp_igd_result(
- {final_state_str}
- ) AS result
- ) rel_state_subq
- """.format(**locals())
+ {schema_madlib}.internal_mlp_igd_result(
+ {final_state_str}
+ ) AS result
+ ) rel_state_subq
+ """.format(**locals())
plpy.execute(model_table_query)
-# ----------------------------------------------------------------------
def _get_optimizer_params(param_str):
params_defaults = {
- "step_size": (0.001, float),
+ "learning_rate_init": (0.001, float),
"n_iterations": (100, int),
"n_tries": (1, int),
"tolerance": (0.001, float),
+ "learning_rate_policy": ("constant", str),
+ "gamma": (0.1, float),
+ "iterations_per_step": (100, int),
+ "power": (0.5, float),
+ "lambda": (0, float)
}
param_defaults = dict([(k, v[0]) for k, v in params_defaults.items()])
param_types = dict([(k, v[1]) for k, v in params_defaults.items()])
@@ -228,10 +346,9 @@ def _get_optimizer_params(param_str):
if not param_str:
return param_defaults
- name_value = extract_keyvalue_params(param_str, param_types, param_defaults,
- ignore_invalid=False)
+ name_value = extract_keyvalue_params(
+ param_str, param_types, param_defaults, ignore_invalid=False)
return name_value
-# ----------------------------------------------------------------------
def _validate_args_classification(source_table, dependent_varname):
@@ -239,89 +356,174 @@ def _validate_args_classification(source_table, dependent_varname):
int_types = ['integer', 'smallint', 'bigint']
text_types = ['text', 'varchar', 'character varying', 'char', 'character']
boolean_types = ['boolean']
- _assert("[]" in expr_type or expr_type in int_types + text_types + boolean_types,
+ _assert("[]" in expr_type
+ or expr_type in int_types + text_types + boolean_types,
"Dependent variable column should refer to an "
"integer, boolean, text, varchar, or character type.")
-# ----------------------------------------------------------------------
def _validate_args_regression(source_table, dependent_varname):
expr_type = get_expr_type(dependent_varname, source_table)
int_types = ['integer', 'smallint', 'bigint']
float_types = ['double precision', 'real']
- _assert("[]" in expr_type or expr_type in int_types + float_types,
- "Dependent variable column should refer to an array or numeric type")
+ _assert(
+ "[]" in expr_type or expr_type in int_types + float_types,
+ "Dependent variable column should refer to an array or numeric type")
if "[]" in expr_type:
- _assert(array_col_has_same_dimension(source_table, dependent_varname),
- "Dependent variable column should refer to arrays of the same length")
-# ----------------------------------------------------------------------
+ _assert(
+ array_col_has_same_dimension(source_table, dependent_varname),
+ "Dependent variable column should refer to arrays of the same length"
+ )
+
+
+def _validate_summary_table(summary_table):
+ input_tbl_valid(summary_table, 'MLP')
+ cols_in_tbl_valid(summary_table, [
+ 'dependent_varname', 'independent_varname', 'activation',
+ 'tolerance', 'learning_rate_init', 'n_iterations', 'n_tries',
+ 'classes', 'layer_sizes', 'source_table', 'x_means', 'x_stds'
+ ], 'MLP')
+
+
+def _validate_warm_start(source_table, output_table, summary_table, independent_varname,
+ dependent_varname, layer_sizes,
+ optimizer_params, is_classification, weights,
+ warm_start, activation):
+ _assert(table_exists(output_table),
+ "MLP error: Warm start failed due to missing model table: " + output_table)
+ _assert(table_exists(summary_table),
+ "MLP error: Warm start failed due to missing summary table: " + summary_table)
+
+ _assert(optimizer_params["n_tries"] == 1,
+ "MLP error: warm_start is only compatible for n_tries = 1")
+
+ summary = plpy.execute("SELECT * FROM {0}".format(summary_table))[0]
+ params = [
+ "independent_varname", "dependent_varname", "layer_sizes",
+ "is_classification", "weights", "activation"
+ ]
+ for param in params:
+ _assert_equal(eval(param), summary[param],
+ "MLP error: warm start failed due to different parameter value: " +
+ param)
+ output = plpy.execute("SELECT * FROM {0}".format(output_table))[0]
+ coeff = output['coeff']
+ num_coeffs = sum(
+ map(lambda i: (layer_sizes[i] + 1) * (layer_sizes[i + 1]),
+ range(len(layer_sizes) - 1)))
+ _assert_equal(num_coeffs,
+ len(coeff),
+ "MLP error: Warm start failed to invalid output_table: " +
+ output_table + ". Invalid number of coefficients in model.")
+ x_means = py_list_to_sql_string(
+ summary["x_means"], array_type="DOUBLE PRECISION")
+ x_stds = py_list_to_sql_string(
+ summary["x_stds"], array_type="DOUBLE PRECISION")
+
+ return coeff, x_means, x_stds
def _validate_args(source_table, output_table, summary_table, independent_varname,
dependent_varname, hidden_layer_sizes,
- optimizer_params, is_classification):
+ optimizer_params, is_classification, weights, warm_start, activation):
input_tbl_valid(source_table, "MLP")
- output_tbl_valid(output_table, "MLP")
- output_tbl_valid(summary_table, "MLP")
- _assert(is_var_valid(source_table, independent_varname),
- "MLP error: invalid independent_varname "
- "('{independent_varname}') for source_table "
- "({source_table})!".format(independent_varname=independent_varname,
- source_table=source_table))
-
- _assert(is_var_valid(source_table, dependent_varname),
- "MLP error: invalid dependent_varname "
- "('{dependent_varname}') for source_table "
- "({source_table})!".format(dependent_varname=dependent_varname,
- source_table=source_table))
- _assert(hidden_layer_sizes is not None,
- "hidden_layer_sizes may not be null")
- _assert(isinstance(hidden_layer_sizes, list),
- "hidden_layer_sizes must be an array of integers")
- _assert(all(isinstance(value, int) for value in hidden_layer_sizes),
- "MLP error: Hidden layers sizes must be integers")
- _assert(all(value >= 0 for value in hidden_layer_sizes),
- "MLP error: Hidden layers sizes must be greater than 0.")
+ if not warm_start:
+ output_tbl_valid(output_table, "MLP")
+ output_tbl_valid(summary_table, "MLP")
+
+ _assert(
+ is_var_valid(source_table, independent_varname),
+ "MLP error: invalid independent_varname "
+ "('{independent_varname}') for source_table "
+ "({source_table})!".format(
+ independent_varname=independent_varname,
+ source_table=source_table))
+
+ _assert(
+ is_var_valid(source_table, dependent_varname),
+ "MLP error: invalid dependent_varname "
+ "('{dependent_varname}') for source_table "
+ "({source_table})!".format(
+ dependent_varname=dependent_varname, source_table=source_table))
+ _assert(
+ isinstance(hidden_layer_sizes, list),
+ "hidden_layer_sizes must be an array of integers")
+ # TODO put this check earlier
+ _assert(
+ all(isinstance(value, int) for value in hidden_layer_sizes),
+ "MLP error: Hidden layers sizes must be integers")
+ _assert(
+ all(value >= 0 for value in hidden_layer_sizes),
+ "MLP error: Hidden layers sizes must be greater than 0.")
+ _assert(optimizer_params["lambda"] >= 0,
+ "MLP error: lambda should be greater than or equal to 0.")
_assert(optimizer_params["tolerance"] >= 0,
- "MLP error: Tolerance should be greater than or equal to 0.")
+ "MLP error: tolerance should be greater than or equal to 0.")
_assert(optimizer_params["n_tries"] >= 1,
- "MLP error: Number of tries should be greater than or equal to 1")
- _assert(optimizer_params["n_iterations"] >= 1,
- "MLP error: Number of iterations should be greater than or equal to 1")
- _assert(optimizer_params["step_size"] > 0,
- "MLP error: Stepsize should be greater than 0.")
+ "MLP error: n_tries should be greater than or equal to 1")
+ _assert(
+ optimizer_params["n_iterations"] >= 1,
+ "MLP error: n_iterations should be greater than or equal to 1")
+ _assert(optimizer_params["power"] > 0,
+ "MLP error: power should be greater than 0.")
+ _assert(0 < optimizer_params["gamma"] <= 1,
+ "MLP error: gamma should be between 0 and 1.")
+ _assert(optimizer_params["iterations_per_step"] > 0,
+ "MLP error: iterations_per_step should be greater than 0.")
+ _assert(optimizer_params["learning_rate_init"] > 0,
+ "MLP error: learning_rate_init should be greater than 0.")
_assert("[]" in get_expr_type(independent_varname, source_table),
"Independent variable column should refer to an array")
- _assert(array_col_has_same_dimension(source_table, independent_varname),
- "Independent variable column should refer to arrays of the same length")
+ _assert(
+ array_col_has_same_dimension(source_table, independent_varname),
+ "Independent variable column should refer to arrays of the same length"
+ )
+
+ int_types = ['integer', 'smallint', 'bigint']
+ float_types = ['double precision', 'real']
+ _assert(
+ get_expr_type(weights, source_table) in int_types + float_types,
+ "MLP error: Weights should be a numeric type")
if is_classification:
_validate_args_classification(source_table, dependent_varname)
else:
_validate_args_regression(source_table, dependent_varname)
-# ----------------------------------------------------------------------
-def _get_activation_function_name(activation_function):
- if not activation_function:
- activation_function = 'sigmoid'
+def _get_learning_rate_policy_name(learning_rate_policy):
+ if not learning_rate_policy:
+ learning_rate_policy = 'constant'
+ else:
+ supported_learning_rate_policies = ['constant', 'exp', 'inv', 'step']
+ try:
+ learning_rate_policy = next(
+ x for x in supported_learning_rate_policies
+ if x.startswith(learning_rate_policy))
+ except StopIteration:
+ plpy.error(
+ "MLP Error: Invalid learning rate policy: "
+ "{0}. Supported learning rate policies are ({1})".format(
+ learning_rate_policy,
+ ','.join(sorted(supported_learning_rate_policies))))
+ return learning_rate_policy
+
+
+def _get_activation_function_name(activation):
+ if not activation:
+ activation = 'sigmoid'
else:
- # Add non-linear kernels below after implementing them.
supported_activation_function = ['sigmoid', 'tanh', 'relu']
try:
- # allow user to specify a prefix substring of
- # supported kernels. This works because the supported
- # kernels have unique prefixes.
- activation_function = next(x for x in supported_activation_function
- if x.startswith(activation_function))
+ activation = next(
+ x for x in supported_activation_function
+ if x.startswith(activation))
except StopIteration:
- # next() returns a StopIteration if no element found
plpy.error("MLP Error: Invalid activation function: "
- "{0}. Supported activation functions are ({1})"
- .format(activation_function, ','.join(
- sorted(supported_activation_function))))
- return activation_function
-# ------------------------------------------------------------------------------
+ "{0}. Supported activation functions are ({1})".format(
+ activation,
+ ','.join(sorted(supported_activation_function))))
+ return activation
def _get_activation_index(activation_name):
@@ -333,12 +535,15 @@ def _format_label(label):
if isinstance(label, str):
return "'" + label + "'"
return label
-# -------------------------------------------------------------------------
-def mlp_predict(schema_madlib, model_table, data_table,
- id_col_name, output_table,
- pred_type='response', **kwargs):
+def mlp_predict(schema_madlib,
+ model_table,
+ data_table,
+ id_col_name,
+ output_table,
+ pred_type='response',
+ **kwargs):
""" Score new observations using a trained neural network
@param schema_madlib Name of the schema where MADlib is installed
@@ -356,13 +561,7 @@ def mlp_predict(schema_madlib, model_table, data_table,
input_tbl_valid(model_table, 'MLP')
cols_in_tbl_valid(model_table, ['coeff'], 'MLP')
summary_table = add_postfix(model_table, "_summary")
- input_tbl_valid(summary_table, 'MLP')
- cols_in_tbl_valid(summary_table,
- ['dependent_varname', 'independent_varname',
- 'activation_function',
- 'tolerance', 'step_size', 'n_iterations',
- 'n_tries', 'classes', 'layer_sizes', 'source_table'],
- 'MLP')
+ _validate_summary_table(summary_table)
summary = plpy.execute("SELECT * FROM {0}".format(summary_table))[0]
coeff = py_list_to_sql_string(plpy.execute(
@@ -370,106 +569,116 @@ def mlp_predict(schema_madlib, model_table, data_table,
dependent_varname = summary['dependent_varname']
independent_varname = summary['independent_varname']
source_table = summary['source_table']
- activation_function = _get_activation_index(summary['activation_function'])
+ activation = _get_activation_index(summary['activation'])
layer_sizes = py_list_to_sql_string(
summary['layer_sizes'], array_type="DOUBLE PRECISION")
is_classification = int(summary["is_classification"])
is_response = int(pred_type == 'response')
+ x_means = py_list_to_sql_string(
+ summary["x_means"], array_type="DOUBLE PRECISION")
+ x_stds = py_list_to_sql_string(
+ summary["x_stds"], array_type="DOUBLE PRECISION")
- pred_name = ('"prob_{0}"' if pred_type == "prob" else
- '"estimated_{0}"').format(dependent_varname.replace('"', '').strip())
+ pred_name = (
+ '"prob_{0}"' if pred_type == "prob" else
+ '"estimated_{0}"').format(dependent_varname.replace('"', '').strip())
input_tbl_valid(data_table, 'MLP')
- _assert(is_var_valid(data_table, independent_varname),
- "MLP Error: independent_varname ('{0}') is invalid for data_table ({1})".
- format(independent_varname, data_table))
+ _assert(
+ is_var_valid(data_table, independent_varname),
+ "MLP Error: independent_varname ('{0}') is invalid for data_table ({1})".
+ format(independent_varname, data_table))
_assert(id_col_name is not None, "MLP Error: id_col_name is NULL")
- _assert(is_var_valid(data_table, id_col_name),
- "MLP Error: id_col_name ('{0}') is invalid for {1}".
- format(id_col_name, data_table))
+ _assert(
+ is_var_valid(data_table, id_col_name),
+ "MLP Error: id_col_name ('{0}') is invalid for {1}".format(
+ id_col_name, data_table))
output_tbl_valid(output_table, 'MLP')
- with MinWarning("warning"):
- header = "CREATE TABLE " + output_table + " AS "
- # Regression
- if not is_classification:
- dependent_type = get_expr_type(dependent_varname, source_table)
- unnest_if_not_array = ""
- # Return the same type as the user provided. Internally we always use an array, but
- # if they provided a scaler, unnest it for the user
- if "[]" not in dependent_type:
- unnest_if_not_array = "UNNEST"
+ header = "CREATE TABLE " + output_table + " AS "
+ # Regression
+ if not is_classification:
+ dependent_type = get_expr_type(dependent_varname, source_table)
+ unnest_if_not_array = ""
+ # Return the same type as the user provided. Internally we always
+ # use an array, but if they provided a scaler, unnest it for
+ # the user
+ if "[]" not in dependent_type:
+ unnest_if_not_array = "UNNEST"
+ sql = header + """
+ SELECT {id_col_name},
+ {unnest_if_not_array}({schema_madlib}.internal_predict_mlp(
+ {coeff},
+ {independent_varname}::DOUBLE PRECISION[],
+ {is_classification},
+ {activation},
+ {layer_sizes},
+ {is_response},
+ {x_means},
+ {x_stds}
+ )) as {pred_name}
+ FROM {data_table}
+ """
+ else:
+ summary_query = """
+ SELECT classes FROM {0}
+ """.format(summary_table)
+ classes = plpy.execute(summary_query)[0]['classes']
+ if pred_type == "response":
+ classes_with_index_table = unique_string()
+ classes_table = unique_string()
sql = header + """
- SELECT {id_col_name},
- {unnest_if_not_array}({schema_madlib}.internal_predict_mlp(
- {coeff},
- {independent_varname}::DOUBLE PRECISION[],
- {is_classification},
- {activation_function},
- {layer_sizes},
- {is_response}
- )) as {pred_name}
- FROM {data_table}
+ SELECT
+ q.{id_col_name}
+ ,(ARRAY{classes})[pred_idx[1]+1] as {pred_name}
+ FROM (
+ SELECT
+ {id_col_name},
+ {schema_madlib}.internal_predict_mlp(
+ {coeff}::DOUBLE PRECISION[],
+ {independent_varname}::DOUBLE PRECISION[],
+ {is_classification},
+ {activation},
+ {layer_sizes},
+ {is_response},
+ {x_means},
+ {x_stds}
+ )
+ as pred_idx
+ FROM {data_table}
+ ) q
"""
else:
- summary_query = """
- SELECT classes FROM {0}
- """.format(summary_table)
- classes = plpy.execute(summary_query)[0]['classes']
- if pred_type == "response":
- # This join is to recover the class name from the summary table,
- # as prediction just returns an index
- classes_with_index_table = unique_string()
- classes_table = unique_string()
- sql = header + """
- SELECT
- q.{id_col_name}
- ,(ARRAY{classes})[pred_idx[1]+1] as {pred_name}
- FROM (
- SELECT
- {id_col_name},
- {schema_madlib}.internal_predict_mlp(
- {coeff}::DOUBLE PRECISION[],
- {independent_varname}::DOUBLE PRECISION[],
- {is_classification},
- {activation_function},
- {layer_sizes},
- {is_response}
- )
- as pred_idx
- FROM {data_table}
- ) q
- """
- else:
- # Incomplete
- intermediate_col = unique_string()
- score_format = ',\n'.join([
- 'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
- format(j=i + 1, c_str=str(c).strip(' "'),
- interim=intermediate_col)
- for i, c in enumerate(classes)])
- sql = header + """
- SELECT
- {id_col_name},
- {score_format}
- FROM (
- SELECT {id_col_name},
- {schema_madlib}.internal_predict_mlp(
- {coeff}::DOUBLE PRECISION[],
- {independent_varname}::DOUBLE PRECISION[],
- {is_classification},
- {activation_function},
- {layer_sizes},
- {is_response}
- )::TEXT[]
- AS {intermediate_col}
- FROM {data_table}
- ) q
- """
+ # Incomplete
+ intermediate_col = unique_string()
+ score_format = ',\n'.join([
+ 'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
+ format(j=i + 1, c_str=str(c).strip(' "'),
+ interim=intermediate_col)
+ for i, c in enumerate(classes)])
+ sql = header + """
+ SELECT
+ {id_col_name},
+ {score_format}
+ FROM (
+ SELECT {id_col_name},
+ {schema_madlib}.internal_predict_mlp(
+ {coeff}::DOUBLE PRECISION[],
+ {independent_varname}::DOUBLE PRECISION[],
+ {is_classification},
+ {activation},
+ {layer_sizes},
+ {is_response},
+ {x_means},
+ {x_stds}
+ )::TEXT[]
+ AS {intermediate_col}
+ FROM {data_table}
+ ) q
+ """
sql = sql.format(**locals())
plpy.execute(sql)
-# ----------------------------------------------------------------------
def mlp_help(schema_madlib, message, is_classification):
@@ -511,34 +720,44 @@ def mlp_help(schema_madlib, message, is_classification):
USAGE
---------------------------------------------------------------------------
SELECT {schema_madlib}.{method}(
- source_table, -- name of input table
- output_table, -- name of output model table
- independent_varname, -- name of independent variable
- dependent_varname, -- {label_description}
- hidden_layer_sizes, -- Array of integers indicating the
+ source_table, -- TEXT. name of input table
+ output_table, -- TEXT. name of output model table
+ independent_varname, -- TEXT. name of independent variable
+ dependent_varname, -- TEXT. {label_description}
+ hidden_layer_sizes, -- INTEGER[]. Array of integers indicating the
number of hidden units per layer.
Length equal to the number of hidden layers.
- optimizer_params, -- optional, default NULL
+ optimizer_params, -- TEXT. optional, default NULL
parameters for optimization in
a comma-separated string of key-value pairs.
+ To find out more:
+
+ SELECT {schema_madlib}.{method}('optimizer_params')
- step_size DOUBLE PRECISION, -- Default: 0.001
- Learning rate
- n_iterations INTEGER, -- Default: 100
- Number of iterations per try
- n_tries INTEGER, -- Default: 1
- Total number of training cycles,
- with random initializations to avoid
- local minima.
- tolerance DOUBLE PRECISION, -- Default: 0.001
- If the distance in loss between
- two iterations is less than the
- tolerance training will stop, even if
- n_iterations has not been reached
-
- activation -- optional, default: 'sigmoid'.
+ activation -- TEXT. optional, default: 'sigmoid'.
supported activations: 'relu', 'sigmoid',
and 'tanh'
+
+ weights -- TEXT. optional, default: NULL.
+ Weights for input rows. Column name which
+ specifies the weight for each input row.
+ This weight will be incorporated into the
+ update during SGD, and will not be used
+ for loss calculations. If not specified,
+ weight for each row will default to 1.
+ Column should be a numeric type.
+
+ warm_start -- BOOLEAN. optional, default: FALSE.
+ Initalize weights with the coefficients from
+ the last call. If true, weights will
+ be initialized from output_table. Note that
+ all parameters other than optimizer_params,
+ and verbose must remain constant between calls
+ to warm_start.
+
+ verbose -- BOOLEAN. optional, default: FALSE
+ Provides verbose output of the results of
+ training.
);
@@ -576,22 +795,29 @@ def mlp_help(schema_madlib, message, is_classification):
{1,0.09378,12.50,7.870,0,0.5240,5.8890,39.00,5.4509,5,311.0,15.20,390.50,15.71} | 1 | 21.70
\.
- - Generate a multilayer perception with a two hidden layers of 5 units
+ - Generate a multilayer perception with a two hidden layers of 25 units
each. Use the x column as the independent variables, and use the class
- column as the classification. Set the tolerance to 0 so that 300
+ column as the classification. Set the tolerance to 0 so that 500
iterations will be run. Use a sigmoid activation function.
The model will be written to mlp_regress_result.
- SELECT mlp_regression(
- 'lin_housing_wi', -- Source table
- 'mlp_regress_result', -- Desination table
- 'x', -- Independent variable
- 'y', -- Dependent variable
- ARRAY[5,5], -- Number of hidden units per layer
- 'step_size=0.007,
- n_iterations=300,
+ DROP TABLE IF EXISTS mlp_regress;
+ DROP TABLE IF EXISTS mlp_regress_summary;
+ SELECT madlib.mlp_regression(
+ 'lin_housing', -- Source table
+ 'mlp_regress', -- Desination table
+ 'x', -- Input features
+ 'y', -- Dependent variable
+ ARRAY[25,25], -- Number of units per layer
+ 'learning_rate_init=0.001,
+ n_iterations=500,
+ lambda=0.001,
tolerance=0',
- 'sigmoid'); -- Activation
+ 'relu',
+ NULL, -- Default weight (1)
+ FALSE, -- No warm start
+ TRUE -- Verbose
+ );
"""
@@ -630,29 +856,78 @@ def mlp_help(schema_madlib, message, is_classification):
-- Generate a multilayer perception with a single hidden layer of 5 units.
Use the attributes column as the independent variables, and use the class
- column as the classification. Set the tolerance to 0 so that 1000
+ column as the classification. Set the tolerance to 0 so that 500
iterations will be run. Use a hyperbolic tangent activation function.
- The model will be written to mlp_result.
+ The model will be written to mlp_model.
- SELECT {schema_madlib}.mlp_classification(
+ DROP TABLE IF EXISTS mlp_model;
+ DROP TABLE IF EXISTS mlp_model_summary;
+ SELECT madlib.mlp_classification(
'iris_data', -- Source table
'mlp_model', -- Destination table
'attributes', -- Input features
'class_text', -- Label
ARRAY[5], -- Number of units per layer
- 'step_size=0.003,
- n_iterations=5000,
+ 'learning_rate_init=0.003,
+ n_iterations=500,
tolerance=0', -- Optimizer params
- 'tanh'); -- Activation function
+ 'tanh', -- Activation function
+ NULL, -- Default weight (1)
+ FALSE, -- No warm start
+ TRUE -- Verbose
+ );
+
+ SELECT * FROM mlp_model;
""".format(**args)
example = classification_example if is_classification else regression_example
+ optimizer_params = """
+ ------------------------------------------------------------------------------------------------
+ OPTIMIZER PARAMS
+ ------------------------------------------------------------------------------------------------
+ learning_rate_init DOUBLE PRECISION, -- Default: 0.001
+ Initial learning rate
+ learning_rate_policy VARCHAR, -- Default: 'constant'
+ One of 'constant','exp','inv','step'
+ 'constant': learning_rate =
+ learning_rate_init
+ 'exp': learning_rate =
+ learning_rate_init * gamma^(iter)
+ 'inv': learning_rate =
+ learning_rate_init * (iter+1)^(-power)
+ 'step': learning_rate =
+ learning_rate_init * gamma^(floor(iter/iterations_per_step))
+ Where iter is the current iteration of SGD.
+ gamma DOUBLE PRECISION, -- Default: '0.1'
+ Decay rate for learning rate.
+ Valid for learning_rate_policy = 'exp', or 'step'
+ power DOUBLE PRECISION, -- Default: '0.5'
+ Exponent for learning_rate_policy = 'inv'
+ iterations_per_step INTEGER, -- Default: '100'
+ Number of iterations to run before decreasing the learning
+ rate by a factor of gamma. Valid for learning rate
+ policy = 'step'
+ n_iterations INTEGER, -- Default: 100
+ Number of iterations per try
+ n_tries INTEGER, -- Default: 1
+ Total number of training cycles,
+ with random initializations to avoid
+ local minima.
+ tolerance DOUBLE PRECISION, -- Default: 0.001
+ If the distance in loss between
+ two iterations is less than the
+ tolerance training will stop, even if
+ n_iterations has not been reached.
+ """.format(**args)
+
if not message:
return summary
elif message.lower() in ('usage', 'help', '?'):
return usage
elif message.lower() == 'example':
return example
+ elif message.lower() == 'optimizer_params':
+ return optimizer_params
return """
No such option. Use "SELECT {schema_madlib}.{method}()" for help.
""".format(**args)
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/ports/postgres/modules/convex/test/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in
index 97541a9..2302252 100644
--- a/src/ports/postgres/modules/convex/test/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/test/mlp.sql_in
@@ -28,7 +28,7 @@
-- Classification
-SELECT setseed(0.5);
+SELECT setseed(0.6);
DROP TABLE IF EXISTS iris_data, iris_test, mlp_class, mlp_class_summary CASCADE;
CREATE TABLE iris_data(
id integer,
@@ -191,21 +191,27 @@ INSERT INTO iris_data VALUES
SELECT mlp_classification(
- 'iris_data', -- Source table
+ 'iris_data', -- Source table
'mlp_class', -- Desination table
- 'attributes', -- Input features
- 'class', -- Label
- ARRAY[5], -- Number of units per layer
- 'step_size=0.001,
- n_iterations=1000,
+ 'attributes', -- Input features
+ 'class', -- Label
+ ARRAY[5], -- Number of units per layer
+ 'learning_rate_init=0.1,
+ learning_rate_policy=constant,
+ n_iterations=800,
+ n_tries=2,
tolerance=0',
- 'tanh');
+ 'sigmoid',
+ '',
+ FALSE,
+ TRUE
+);
SELECT assert(
-- Loss will improve much more if more iterations are run
- loss < 30,
- 'MLP: Loss is too high (> 30). Wrong result.'
+ loss < 0.1,
+ 'MLP: Loss is too high (> 0). Wrong result.'
) FROM mlp_class;
DROP TABLE IF EXISTS mlp_prediction;
@@ -239,9 +245,8 @@ SELECT mlp_predict(
'mlp_prediction',
'response');
-select * from mlp_prediction;
+SELECT * FROM mlp_prediction;
SELECT assert(
- -- Accuracy greater than 90%
COUNT(*)/150.0 > 0.95,
'MLP: Accuracy is too low (< 95%). Wrong result.'
) FROM
@@ -766,65 +771,30 @@ COPY lin_housing_wi (x, grp_by_col, y) FROM STDIN NULL '?' DELIMITER '|';
{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88} | 2 | 11.90
\.
--- Normalize the columns
-CREATE TEMPORARY TABLE maxs as(
- SELECT
- max(x[1]) m1,
- max(x[2]) m2,
- max(x[3]) m3,
- max(x[4]) m4,
- max(x[5]) m5,
- max(x[6]) m6,
- max(x[7]) m7,
- max(x[8]) m8,
- max(x[9]) m9,
- max(x[10]) m10,
- max(x[11]) m11,
- max(x[12]) m12,
- max(x[13]) m13,
- max(x[14]) m14
- from lin_housing_wi
-);
-CREATE TABLE lin_housing_wi_scaled AS
-SELECT ARRAY[
- x[1]/(SELECT m1 from maxs),
- x[2]/(SELECT m2 from maxs),
- x[3]/(SELECT m3 from maxs),
- x[4]/(SELECT m4 from maxs),
- x[5]/(SELECT m5 from maxs),
- x[6]/(SELECT m6 from maxs),
- x[7]/(SELECT m7 from maxs),
- x[8]/(SELECT m8 from maxs),
- x[9]/(SELECT m9 from maxs),
- x[10]/(SELECT m10 from maxs),
- x[11]/(SELECT m11 from maxs),
- x[12]/(SELECT m12 from maxs),
- x[13]/(SELECT m13 from maxs),
- x[14]/(SELECT m14 from maxs)] as x,
- id,y
-FROM lin_housing_wi;
-
-DROP TABLE IF EXISTS maxs;
DROP TABLE IF EXISTS mlp_regress;
DROP TABLE IF EXISTS mlp_regress_summary;
SELECT setseed(0);
SELECT mlp_regression(
- 'lin_housing_wi_scaled', -- Source table
+ 'lin_housing_wi', -- Source table
'mlp_regress', -- Desination table
'x', -- Input features
'y', -- Dependent variable
- ARRAY[5,5], -- Number of units per layer
- 'step_size=0.005,
- n_iterations=800,
+ ARRAY[40], -- Number of units per layer
+ 'learning_rate_init=0.015,
+ learning_rate_policy=inv,
+ n_iterations=300,
tolerance=0',
- 'sigmoid');
+ 'sigmoid',
+ '',
+ False,
+ TRUE);
SELECT assert(
- loss < 10,
+ loss < 2,
'MLP: Loss is too high (> 10). Wrong result.'
) FROM mlp_regress;
@@ -832,14 +802,14 @@ SELECT assert(
DROP TABLE IF EXISTS mlp_prediction_regress;
SELECT mlp_predict(
'mlp_regress',
- 'lin_housing_wi_scaled',
+ 'lin_housing_wi',
'id',
'mlp_prediction_regress',
'output');
SELECT assert(
- 0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi_scaled.y,2.0))/506 < 10.0,
+ 0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi.y,2.0))/506 < 2.0,
'MLP: Predict MSE is too high (> 10). Wrong result'
)
-FROM mlp_prediction_regress JOIN lin_housing_wi_scaled
-ON mlp_prediction_regress.id = lin_housing_wi_scaled.id;
-DROP TABLE IF EXISTS lin_housing_wi_scaled;
+FROM mlp_prediction_regress JOIN lin_housing_wi
+ON mlp_prediction_regress.id = lin_housing_wi.id;
+DROP TABLE IF EXISTS lin_housing_wi;
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/ports/postgres/modules/utilities/utilities.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index b28a5f3..c1670b5 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -54,6 +54,18 @@ def is_orca():
# ------------------------------------------------------------------------------
+def _assert_equal(o1, o2, msg):
+ """
+ @brief if the given objects are not equal, then raise an error with the message
+ @param o1 the first object
+ @param o2 the second object
+ @param msg the error message to be reported
+ """
+ if not o1 == o2:
+ plpy.error(msg)
+# ------------------------------------------------------------------------------
+
+
def _assert(condition, msg):
"""
@brief if the given condition is false, then raise an error with the message
[2/2] incubator-madlib git commit: MLP: Add multiple enhancements
Posted by ri...@apache.org.
MLP: Add multiple enhancements
JIRA: MADLIB-1134
This commit adds following:
- Weights: Each tuple in training data can be individually weighted
- Warm start: Network weights can be initialized from the output of a
previous call.
- n_tries: Allows calling the train function multiple times to avoid
local minima.
- Learning rate policy: Allows user to specify a policy to decay the
learning rate.
- Standardization: Inputs are standardized to zero mean and unit std.
deviation.
Closes #162
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/ff1b0f88
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/ff1b0f88
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/ff1b0f88
Branch: refs/heads/master
Commit: ff1b0f883c7a178323670b83b14069e06bf1b808
Parents: 6f6f804
Author: Rahul Iyer <ri...@apache.org>
Authored: Mon Aug 14 09:50:25 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Mon Aug 14 09:50:25 2017 -0700
----------------------------------------------------------------------
.gitignore | 1 +
doc/design/modules/neural-network.tex | 144 ++-
doc/literature.bib | 8 +-
doc/mainpage.dox.in | 3 +-
src/modules/convex/mlp_igd.cpp | 74 +-
src/modules/convex/task/l2.hpp | 3 +-
src/modules/convex/task/mlp.hpp | 259 ++----
src/modules/convex/type/model.hpp | 70 +-
src/modules/convex/type/state.hpp | 30 +-
src/modules/convex/type/tuple.hpp | 2 +-
src/ports/postgres/modules/convex/mlp.sql_in | 497 +++++++---
src/ports/postgres/modules/convex/mlp_igd.py_in | 923 ++++++++++++-------
.../postgres/modules/convex/test/mlp.sql_in | 94 +-
.../postgres/modules/utilities/utilities.py_in | 12 +
14 files changed, 1297 insertions(+), 823 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index abfccfa..00dc016 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
# Ignore build directory
/build*
+/build-docker*
# Ignore generated code files
*.so
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/doc/design/modules/neural-network.tex
----------------------------------------------------------------------
diff --git a/doc/design/modules/neural-network.tex b/doc/design/modules/neural-network.tex
index 8802361..9f8110b 100644
--- a/doc/design/modules/neural-network.tex
+++ b/doc/design/modules/neural-network.tex
@@ -22,7 +22,7 @@
\chapter{Neural Network}
\begin{moduleinfo}
-\item[Authors] {Xixuan Feng}
+\item[Authors] {Xixuan Feng, Cooper Sloan}
\end{moduleinfo}
% Abstract. What is the problem we want to solve?
@@ -30,7 +30,8 @@ This module implements artificial neural network \cite{ann_wiki}.
\section{Multilayer Perceptron}
Multilayer perceptron is arguably the most popular model among many neural network models \cite{mlp_wiki}.
-Here, we learn the coefficients by minimizing a least square objective function (\cite{bertsekas1999nonlinear}, example 1.5.3).
+Here, we learn the coefficients by minimizing a least square objective function, or cross entropy (\cite{bertsekas1999nonlinear}, example 1.5.3).
+The parallel architecture is based on the paper by Zhiheng Huang \cite{mlp_parallel}.
% Background. Why can we solve the problem with gradient-based methods?
\subsection{Solving as a Convex Program}
@@ -46,41 +47,47 @@ For multilayer perceptron, we choose incremental gradient descent (IGD).
In the remaining part of this section, we will give a formal description of the derivation of objective function and its gradient.
\paragraph{Objective function.}
-We mostly follow the notations in example 1.5.3 from Bertsekas \cite{bertsekas1999nonlinear}, for a multilayer perceptron that has $N$ layers (stages), and the $k$th stage has $n_k$ activation units ($\phi : \mathbb{R} \to \mathbb{R}$), the objective function is given as
-\[f_{(y, z)}(u) = \frac{1}{2} \|h(u, y) - z\|_2^2,\]
-where $y \in \mathbb{R}^{n_0}$ is the input vector, $z \in \mathbb{R}^{n_N}$ is the output vector,
-\footnote{Of course, the objective function can be defined over a set of input-output vector pairs, which is simply given as the addition of the above $f$.}
+We mostly follow the notations in example 1.5.3 from Bertsekas \cite{bertsekas1999nonlinear}, for a multilayer perceptron that has $N$ layers (stages), and the $k^{th}$ stage has $n_k$ activation units ($\phi : \mathbb{R} \to \mathbb{R}$), the objective function for regression is given as
+\[f_{(x, y)}(u) = \frac{1}{2} \|h(u, x) - y\|_2^2,\]
+and for classification the objective function is given as
+\[f_{(x, y)}(u) = \sum_i (\log(h_i(u, x)) * z_i + (1-\log(h_i(u, x))) *( 1- z_i) ,\]
+where $x \in \mathbb{R}^{n_0}$ is the input vector, $y \in \mathbb{R}^{n_N}$ is the output vector (one hot encoded for classification),~\footnote{Of course, the objective function can be defined over a set of input-output vector pairs, which is simply given as the addition of the above $f$.}
and the coefficients are given as
-\[u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\}\]
+\[u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\},\]
+And are initialized from a uniform distribution as follows:
+\[u_{k}^{sj} = uniform(-r,r),\]
+where r is defined as follows:
+\[r = \sqrt{\frac{6}{n_k+n_{k+1}}}\]
+With regularization, an additional term enters the objective function, given as
+\[\sum_{u_k^{sj}} \frac{1}{2} \lambda u_k^{sj2} \]
This still leaves $h : \mathbb{R}^{n_0} \to \mathbb{R}^{n_N}$ as an open item.
-Let $x_k \in \mathbb{R}^{n_k}, k = 1,...,N$ be the output vector of the $k$th layer. Then we define $h(u, y) = x_N$, based on setting $x_0 = y$ and the $j$th component of $x_k$ is given in an iterative fashion as
-\footnote{$x_k^0 \equiv 1$ is used to simplified the notations, and $x_k^0$ is not a component of $x_k$, for any $k = 0,...,N$.}
+Let $o_k \in \mathbb{R}^{n_k}, k = 1,...,N$ be the output vector of the $k^{th}$ layer. Then we define $h(u, x) = o_N$, based on setting $o_0 = x$ and the $j^{th}$ component of $o_k$ is given in an iterative fashion as~\footnote{$o_k^0 \equiv 1$ is used to simplified the notations, and $o_k^0$ is not a component of $o_k$, for any $k = 0,...,N$.}
\[\begin{alignedat}{5}
- x_k^j = \phi \left( \sum_{s=0}^{n_{k-1}} x_{k-1}^s u_{k-1}^{sj} \right), &\quad k = 1,...,N, \; j = 1,...,n_k
+ o_k^j = \phi \left( \sum_{s=0}^{n_{k-1}} o_{k-1}^s u_{k-1}^{sj} \right), &\quad k = 1,...,N, \; j = 1,...,n_k
\end{alignedat}\]
\paragraph{Gradient of the End Layer.}
Let's first handle $u_{N-1}^{st}, s = 0,...,n_{N-1}, t = 1,...,n_N$.
-Let $z^t$ denote the $t$th component of $z \in \mathbb{R}^{n_N}$, and $h^t$ the $t$th component of output of $h$.
+Let $y^t$ denote the $t^{th}$ component of $y \in \mathbb{R}^{n_N}$, and $h^t$ the $t^{th}$ component of output of $h$.
\[\begin{aligned}
\frac{\partial f}{\partial u_{N-1}^{st}}
- &= \left( h^t(u, y) - z^t \right) \cdot \frac{\partial h^t(u, y)}{\partial u_{N-1}^{st}} \\
- &= \left( x_N^t - z^t \right) \cdot \frac{\partial x_N^t}{\partial u_{N-1}^{st}} \\
- &= \left( x_N^t - z^t \right) \cdot \frac{\partial \phi \left( \sum_{s=0}^{n_{N-1}} x_{N-1}^s u_{N-1}^{st} \right)}{\partial u_{N-1}^{st}} \\
- &= \left( x_N^t - z^t \right) \cdot \phi' \left( \sum_{s=0}^{n_{N-1}} x_{N-1}^s u_{N-1}^{st} \right) \cdot x_{N-1}^s \\
+ &= \left( h^t(u, x) - y^t \right) \cdot \frac{\partial h^t(u, x)}{\partial u_{N-1}^{st}} \\
+ &= \left( o_N^t - y^t \right) \cdot \frac{\partial o_N^t}{\partial u_{N-1}^{st}} \\
+ &= \left( o_N^t - y^t \right) \cdot \frac{\partial \phi \left( \sum_{s=0}^{n_{N-1}} o_{N-1}^s u_{N-1}^{st} \right)}{\partial u_{N-1}^{st}} \\
+ &= \left( o_N^t - y^t \right) \cdot \phi' \left( \sum_{s=0}^{n_{N-1}} o_{N-1}^s u_{N-1}^{st} \right) \cdot o_{N-1}^s \\
\end{aligned}\]
-To ease the notation, let the input vector of the $j$th activation unit of the $(k+1)$th layer be
-\[\mathit{net}_k^j =\sum_{s=0}^{n_{k-1}} x_{k-1}^s u_{k-1}^{sj},\]
-where $k = 1,...,N, \; j = 1,...,n_k$, and note that $x_k^j =\phi(\mathit{net}_k^j)$. Finally, the gradient
-\[\frac{\partial f}{\partial u_{N-1}^{st}} = \left( x_N^t - z^t \right) \cdot \phi' ( \mathit{net}_N^t ) \cdot x_{N-1}^s\]
-For any $s = 0,...,n_{N-1}, t =1,...,n_N$, we are given $z^t$, and $x_N^t, \mathit{net}_N^t, x_{N-1}^s$ can be computed by forward iterating the network layer by layer (also called the feed-forward pass). Therefore, we now know how to compute the coefficients for the end layer $u_{N-1}^{st}, s = 0,...,n_{N-1}, t =1,...,n_N$.
+To ease the notation, let the input vector of the $j^{th}$ activation unit of the $(k+1)^{th}$ layer be
+\[\mathit{net}_k^j =\sum_{s=0}^{n_{k-1}} o_{k-1}^s u_{k-1}^{sj},\]
+where $k = 1,...,N, \; j = 1,...,n_k$, and note that $o_k^j =\phi(\mathit{net}_k^j)$. Finally, the gradient
+\[\frac{\partial f}{\partial u_{N-1}^{st}} = \left( o_N^t - y^t \right) \cdot \phi' ( \mathit{net}_N^t ) \cdot o_{N-1}^s\]
+For any $s = 0,...,n_{N-1}, t =1,...,n_N$, we are given $y^t$, and $o_N^t, \mathit{net}_N^t, o_{N-1}^s$ can be computed by forward iterating the network layer by layer (also called the feed-forward pass). Therefore, we now know how to compute the coefficients for the end layer $u_{N-1}^{st}, s = 0,...,n_{N-1}, t =1,...,n_N$.
\subsubsection{Backpropagation}
For inner (hidden) layers, it is more difficult to compute the partial derivative over the input of activation units (i.e., $\mathit{net}_k, k = 1,...,N-1$).
-That said, $\frac{\partial f}{\partial \mathit{net}_N^t} = (x_N^t - z^t) \phi'(\mathit{net}_N^t)$ is easy, where $t = 1,...,n_N$, but $\frac{\partial f}{\partial \mathit{net}_k^j}$ is hard, where $k = 1,...,N-1, j = 1,..,n_k$.
+That said, $\frac{\partial f}{\partial \mathit{net}_N^t} = (o_N^t - y^t) \phi'(\mathit{net}_N^t)$ is easy, where $t = 1,...,n_N$, but $\frac{\partial f}{\partial \mathit{net}_k^j}$ is hard, where $k = 1,...,N-1, j = 1,..,n_k$.
This hard-to-compute statistic is referred to as \textit{delta error}, and let $\delta_k^j = \frac{\partial f}{\partial \mathit{net}_k^j}$, where $k = 1,...,N-1, j = 1,..,n_k$.
If this is solved, the gradient can be easily computed as follow
-\[\frac{\partial f}{\partial u_{k-1}^{sj}} = \boxed{\frac{\partial f}{\partial \mathit{net}_k^j}} \cdot \frac{\partial \mathit{net}_k^j}{\partial u_{k-1}^{sj}} = \boxed{\delta_k^j} x_{k-1}^s,\]
+\[\frac{\partial f}{\partial u_{k-1}^{sj}} = \boxed{\frac{\partial f}{\partial \mathit{net}_k^j}} \cdot \frac{\partial \mathit{net}_k^j}{\partial u_{k-1}^{sj}} = \boxed{\delta_k^j} o_{k-1}^s,\]
where $k = 1,...,N-1, s = 0,...,n_{k-1}, j = 1,..,n_k$.
To solve this, we introduce the popular backpropagation below.
@@ -90,20 +97,20 @@ First,
\[
\delta_{k}^j
= \frac{\partial f}{\partial \mathit{net}_{k}^j}
- = \frac{\partial f}{\partial x_{k}^j} \cdot \frac{\partial x_{k}^j}{\partial \mathit{net}_{k}^j}
- = \frac{\partial f}{\partial x_{k}^j} \cdot \phi'(\mathit{net}_{k}^j)
+ = \frac{\partial f}{\partial o_{k}^j} \cdot \frac{\partial o_{k}^j}{\partial \mathit{net}_{k}^j}
+ = \frac{\partial f}{\partial o_{k}^j} \cdot \phi'(\mathit{net}_{k}^j)
\]
And here comes the only equation that is needed but the author, I (Aaron), do not understand but it looks reasonable and repeats in different online notes \cite{mlp_gradient_wisc},
\[\begin{alignedat}{5}
- \frac{\partial f}{\partial x_{k}^j} = \sum_{t=1}^{n_{k+1}} \left( \frac{\partial f}{\partial \mathit{net}_{k+1}^t} \cdot \frac{\partial \mathit{net}_{k+1}^t}{\partial x_{k}^j} \right),
+ \frac{\partial f}{\partial o_{k}^j} = \sum_{t=1}^{n_{k+1}} \left( \frac{\partial f}{\partial \mathit{net}_{k+1}^t} \cdot \frac{\partial \mathit{net}_{k+1}^t}{\partial o_{k}^j} \right),
&\quad k = 1,...,N-1, \: j = 1,...,n_{k}
\end{alignedat}\]
Assuming the above equation is true, we can solve delta error backward iteratively
\[\begin{aligned}
\delta_{k}^j
- &= \frac{\partial f}{\partial x_{k}^j} \cdot \phi'(\mathit{net}_{k}^j) \\
- &= \sum_{t=1}^{n_{k+1}} \left( \frac{\partial f}{\partial \mathit{net}_{k+1}^t} \cdot \frac{\partial \mathit{net}_{k+1}^t}{\partial x_{k}^j} \right) \cdot \phi'(\mathit{net}_{k}^j) \\
- &= \sum_{t=1}^{n_{k+1}} \left( \delta_{k+1}^t \cdot \frac{\partial \left( \sum_{s=0}^{n_{k}} x_{k}^s u_{k}^{st} \right) }{\partial x_{k}^j} \right) \cdot \phi'(\mathit{net}_{k}^j) \\
+ &= \frac{\partial f}{\partial o_{k}^j} \cdot \phi'(\mathit{net}_{k}^j) \\
+ &= \sum_{t=1}^{n_{k+1}} \left( \frac{\partial f}{\partial \mathit{net}_{k+1}^t} \cdot \frac{\partial \mathit{net}_{k+1}^t}{\partial o_{k}^j} \right) \cdot \phi'(\mathit{net}_{k}^j) \\
+ &= \sum_{t=1}^{n_{k+1}} \left( \delta_{k+1}^t \cdot \frac{\partial \left( \sum_{s=0}^{n_{k}} o_{k}^s u_{k}^{st} \right) }{\partial o_{k}^j} \right) \cdot \phi'(\mathit{net}_{k}^j) \\
&= \sum_{t=1}^{n_{k+1}} \left( \delta_{k+1}^t \cdot u_{k}^{jt} \right) \cdot \phi'(\mathit{net}_{k}^j) \\
\end{aligned}\]
To sum up, we need the following equation for error back propagation
@@ -111,20 +118,20 @@ To sum up, we need the following equation for error back propagation
where $k = 1,...,N-1$, and $j = 1,...,n_{k}$.
\subsubsection{The $\mathit{Gradient}$ Function}
-\begin{algorithm}[mlp-gradient$(u, y, z)$] \label{alg:mlp-gradient}
+\begin{algorithm}[mlp-gradient$(u, x, y)$] \label{alg:mlp-gradient}
\alginput{Coefficients $u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\}$,\\
-start vector $y \in \mathbb{R}^{n_0}$,\\
-end vector $z \in \mathbb{R}^{n_N}$,\\
+start vector $x \in \mathbb{R}^{n_0}$,\\
+end vector $y \in \mathbb{R}^{n_N}$,\\
activation unit $\phi : \mathbb{R} \to \mathbb{R}$}
\algoutput{Gradient value $\nabla f(u)$ that consists of components $\nabla f(u)_{k-1}^{sj} = \frac{\partial f}{\partial u_{k-1}^{sj}}$}
\begin{algorithmic}[1]
- \State $(\mathit{net}, x) \set$ \texttt{feed-forward}$(u, y, \phi)$
- \State $\delta_N \set$ \texttt{end-layer-delta-error}$(\mathit{net}, x, z, \phi')$
+ \State $(\mathit{net}, o) \set$ \texttt{feed-forward}$(u, x, \phi)$
+ \State $\delta_N \set$ \texttt{end-layer-delta-error}$(\mathit{net}, o, y, \phi')$
\State $\delta \set$ \texttt{error-back-propagation}$(\delta_N, \mathit{net}, u, \phi')$
\For{$k = 1,...,N$}
\For{$s = 0,...,n_{k-1}$}
\For{$j = 1,...,n_k$}
- \State $\nabla f(u)_{k-1}^{sj} \set \delta_k^j x_{k-1}^s$
+ \State $\nabla f(u)_{k-1}^{sj} \set \delta_k^j o_{k-1}^s$
\Comment{Can be put together with the computation of delta $\delta$}
\EndFor
\EndFor
@@ -138,46 +145,47 @@ Common examples of activation units are
\[\begin{alignedat}{3}
\phi(\xi) &= \frac{1}{1 + e^{-\xi}}, &\quad \text{ (logistic function),}\\
\phi(\xi) &= \frac{e^{\xi} - e^{-\xi}}{e^{\xi} + e^{-\xi}}, &\quad \text{ (hyperbolic tangent function)}\\
+\phi(\xi) &= max(x,0), &\quad \text{ (rectified linear function)}\\
\end{alignedat}\]
-\begin{algorithm}[feed-forward$(u, y, \phi)$] \label{alg:feed-forward}
+\begin{algorithm}[feed-forward$(u, x, \phi)$] \label{alg:feed-forward}
\alginput{Coefficients $u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\}$,\\
-input vector $y \in \mathbb{R}^{n_0}$,\\
+input vector $x \in \mathbb{R}^{n_0}$,\\
activation unit $\phi : \mathbb{R} \to \mathbb{R}$}
\algoutput{Input vectors $\mathit{net} = \{\mathit{net}_k^j \; | \; k = 1,...,N, \: j = 1,...,n_k\}$,\\
-output vectors $x = \{x_k^j \; | \; k = 0,...,N, \: j = 0,...,n_k\}$}
+output vectors $o = \{o_k^j \; | \; k = 0,...,N, \: j = 0,...,n_k\}$}
\begin{algorithmic}[1]
\For{$k = 0,...,N$}
- \State $x_k^0 \set 1$
+ \State $o_k^0 \set 1$
\EndFor
- \State $x_0 \set y$ \Comment{For all components $x_0^j, y^j, \; j = 1,...,n_0$}
+ \State $o_0 \set x$ \Comment{For all components $o_0^j, x^j, \; j = 1,...,n_0$}
\For{$k = 1,...,N$}
\For{$j = 1,...,n_k$}
\State $\mathit{net}_k^j \set 0$
\For{$s = 0,...,n_{k-1}$}
- \State $\mathit{net}_k^j \set \mathit{net}_k^j + x_{k-1}^s u_{k-1}^{sj}$
+ \State $\mathit{net}_k^j \set \mathit{net}_k^j + o_{k-1}^s u_{k-1}^{sj}$
\EndFor
- \State $x_k^j = \phi(\mathit{net}_k^j)$
+ \State $o_k^j = \phi(\mathit{net}_k^j)$ \Comment{Where the activation function for the final layer is identity for regression and softmax for classification.}
\EndFor
\EndFor
- \State \Return $(\mathit{net}, x)$
+ \State \Return $(\mathit{net}, o)$
\end{algorithmic}
\end{algorithm}
-\begin{algorithm}[end-layer-delta-error$(\mathit{net}, x, z, \phi')$] \label{alg:end-layer-delta-error}
+\clearpage
+\begin{algorithm}[end-layer-delta-error$(\mathit{net}, o, y, \phi')$] \label{alg:end-layer-delta-error}
\alginput{Input vectors $\mathit{net} = \{\mathit{net}_k^j \; | \; k = 1,...,N, \: j = 1,...,n_k\}$,\\
-output vectors $x = \{x_k^j \; | \; k = 0,...,N, \: j = 0,...,n_k\}$,\\
-end vector $z \in \mathbb{R}^{n_N}$,\\
+output vectors $o = \{o_k^j \; | \; k = 0,...,N, \: j = 0,...,n_k\}$,\\
+end vector $y \in \mathbb{R}^{n_N}$,\\
derivative of activation unit $\phi' : \mathbb{R} \to \mathbb{R}$}
\algoutput{End layer delta $\delta_N = \{\delta_N^t \; | \; t = 1,...,n_N\}$}
\begin{algorithmic}[1]
\For{$t = 1,...,n_N$}
- \State $\delta_N^t \set (x_N^t - z^t) \phi'(\mathit{net}_N^t)$
+ \State $\delta_N^t \set (o_N^t - y^t)$ \Comment{This applies for identity activation and mean square error loss and softmax activation with cross entropy loss}
\EndFor
\State \Return $\delta_N$
\end{algorithmic}
\end{algorithm}
-
\begin{algorithm}[error-back-propagation$(\delta_N, \mathit{net}, u, \phi')$] \label{alg:error-back-propagation}
\alginput{End layer delta $\delta_N = \{\delta_N^t \; | \; t = 1,...,n_N\}$,\\
input vectors $\mathit{net} = \{\mathit{net}_k^j \; | \; k = 1,...,N, \: j = 1,...,n_k\}$,\\
@@ -197,3 +205,45 @@ derivative of activation unit $\phi' : \mathbb{R} \to \mathbb{R}$}
\State \Return $\delta$
\end{algorithmic}
\end{algorithm}
+
+\begin{algorithm}[mlp-train-iteration$(X, Y, \eta)$] \label{alg:mlp-train-iteration}
+\alginput{
+start vectors $X_{i...m} \in \mathbb{R}^{n_0}$,\\
+end vectors $Y_{i...m} \in \mathbb{R}^{n_N}$,\\
+learning rate $\eta$,\\}
+\algoutput{Coefficients $u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\}$}
+\begin{algorithmic}[1]
+ \State \texttt{Randomnly initialize u}
+ \For{$i = 1,...,m$}
+ \State $\nabla f(u) \set \texttt{mlp-gradient}(u,X_i,Y_i)$
+ \State $u \set u - (\eta \nabla f(u) u + \lambda u)$
+ \EndFor
+ \State \Return $u$
+\end{algorithmic}
+\end{algorithm}
+
+\clearpage
+\begin{algorithm}[mlp-train-parallel$(X, Y, \eta, s, t)$] \label{alg:mlp-train-parallel}
+\alginput{
+start vectors $X_{i...m} \in \mathbb{R}^{n_0}$,\\
+end vectors $Y_{i...m} \in \mathbb{R}^{n_N}$,\\
+learning rate $\eta$,\\
+segments $s$,\\
+iterations $t$,\\}
+\algoutput{Coefficients $u = \{ u_{k-1}^{sj} \; | \; k = 1,...,N, \: s = 0,...,n_{k-1}, \: j = 1,...,n_k\}$}
+\begin{algorithmic}[1]
+ \State \texttt{Randomnly initialize u}
+ \For{$j = 1,...,s$}
+ \State $X_j \set \texttt{subset-of-X}$
+ \State $Y_j \set \texttt{subset-of-Y}$
+ \EndFor
+ \For{$i = 1,...,t$}
+ \For{$j = 1,...,s$}
+ \State $u_j \set copy(u)$
+ \State $u_j \set \texttt{mlp-train-iteration}(X_j, Y_j, \eta)$
+ \EndFor
+ \State $u \set \texttt{weighted-avg}(u_{1...s})$
+ \EndFor
+ \State \Return $u$
+\end{algorithmic}
+\end{algorithm}
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/doc/literature.bib
----------------------------------------------------------------------
diff --git a/doc/literature.bib b/doc/literature.bib
index 225622d..6784f5e 100644
--- a/doc/literature.bib
+++ b/doc/literature.bib
@@ -953,4 +953,10 @@ Applied Survival Analysis},
@online{bfs_wikipedia,
title = {Breadth-first search},
url={https://en.wikipedia.org/wiki/Breadth-first_search}
-}
\ No newline at end of file
+}
+
+@misc{mlp_parallel,
+ Url = {https://www.microsoft.com/en-us/research/publication/accelerating-recurrent-neural-network-training-via-two-stage-classes-and-parallelization/},
+ Title = {{Accelerating Recurrent Neural Network Training via Two Stage Classes and Parallelization}},
+ Author = {{Zhiheng Huang}}
+}
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index ccf58a8..e27e14a 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -183,7 +183,7 @@ Contains graph algorithms.
@defgroup grp_crf Conditional Random Field
@ingroup grp_super
- @defgroup grp_mlp Multilayer Perceptron
+ @defgroup grp_nn Neural Network
@ingroup grp_super
@defgroup grp_regml Regression Models
@@ -202,7 +202,6 @@ Contains graph algorithms.
@defgroup grp_robust Robust Variance
@}
-
@defgroup grp_svm Support Vector Machines
@ingroup grp_super
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/mlp_igd.cpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/mlp_igd.cpp b/src/modules/convex/mlp_igd.cpp
index 3647d5f..9e9e665 100644
--- a/src/modules/convex/mlp_igd.cpp
+++ b/src/modules/convex/mlp_igd.cpp
@@ -29,6 +29,7 @@
#include "mlp_igd.hpp"
#include "task/mlp.hpp"
+#include "task/l2.hpp"
#include "algo/igd.hpp"
#include "algo/loss.hpp"
@@ -51,6 +52,8 @@ typedef Loss<MLPIGDState<MutableArrayHandle<double> >, MLPIGDState<ArrayHandle<d
typedef MLP<MLPModel<MutableArrayHandle<double> >,MLPTuple> MLPTask;
+typedef MLPModel<MutableArrayHandle<double> > MLPModelType;
+
/**
* @brief Perform the multilayer perceptron transition step
*
@@ -63,6 +66,7 @@ mlp_igd_transition::run(AnyType &args) {
// For other tuples: args[0] holds the computation state until last tuple
MLPIGDState<MutableArrayHandle<double> > state = args[0];
+
// initilize the state if first tuple
if (state.algo.numRows == 0) {
if (!args[3].isNull()) {
@@ -74,20 +78,30 @@ mlp_igd_transition::run(AnyType &args) {
} else {
// configuration parameters
ArrayHandle<double> numbersOfUnits = args[4].getAs<ArrayHandle<double> >();
+ int numberOfStages = numbersOfUnits.size() - 1;
double stepsize = args[5].getAs<double>();
- state.allocate(*this, numbersOfUnits.size() - 1,
+ state.allocate(*this, numberOfStages,
reinterpret_cast<const double *>(numbersOfUnits.ptr()));
state.task.stepsize = stepsize;
- int activation = args[6].getAs<int>();
-
- int is_classification = args[7].getAs<int>();
- state.task.model.initialize(is_classification, activation);
+ const int activation = args[6].getAs<int>();
+ const int is_classification = args[7].getAs<int>();
+
+ const bool warm_start = args[9].getAs<bool>();
+ const int n_tuples = args[11].getAs<int>();
+ const double lambda = args[12].getAs<double>();
+ state.task.lambda = lambda;
+ MLPTask::lambda = lambda;
+ double is_classification_double = (double) is_classification;
+ double activation_double = (double) activation;
+ MappedColumnVector coeff = args[10].getAs<MappedColumnVector>();
+ state.task.model.rebind(&is_classification_double,&activation_double,
+ &coeff.data()[0], numberOfStages,
+ &numbersOfUnits[0]);
}
-
// resetting in either case
state.reset();
}
@@ -96,25 +110,23 @@ mlp_igd_transition::run(AnyType &args) {
const uint16_t N = state.task.numberOfStages;
const double *n = state.task.numbersOfUnits;
+ MappedColumnVector x_means = args[13].getAs<MappedColumnVector>();
+ MappedColumnVector x_stds = args[14].getAs<MappedColumnVector>();
// tuple
- MappedColumnVector indVar;
+ ColumnVector indVar;
MappedColumnVector depVar;
try {
- // an exception is raised in the backend if args[2] contains nulls
- MappedColumnVector x = args[1].getAs<MappedColumnVector>();
- // x is a const reference, we can only rebind to change its pointer
- indVar.rebind(x.memoryHandle(), x.size());
+ indVar = (args[1].getAs<MappedColumnVector>()-x_means).cwiseQuotient(x_stds);
MappedColumnVector y = args[2].getAs<MappedColumnVector>();
depVar.rebind(y.memoryHandle(), y.size());
-
} catch (const ArrayWithNullException &e) {
return args[0];
}
MLPTuple tuple;
- tuple.indVar.rebind(indVar.memoryHandle(), indVar.size());
+ tuple.indVar = indVar;
tuple.depVar.rebind(depVar.memoryHandle(), depVar.size());
+ tuple.weight = args[8].getAs<double>();
- // Now do the transition step
MLPIGDAlgorithm::transition(state, tuple);
MLPLossAlgorithm::transition(state, tuple);
state.algo.numRows ++;
@@ -130,14 +142,12 @@ mlp_igd_merge::run(AnyType &args) {
MLPIGDState<MutableArrayHandle<double> > stateLeft = args[0];
MLPIGDState<ArrayHandle<double> > stateRight = args[1];
- // We first handle the trivial case where this function is called with one
- // of the states being the initial state
if (stateLeft.algo.numRows == 0) { return stateRight; }
else if (stateRight.algo.numRows == 0) { return stateLeft; }
- // Merge states together
MLPIGDAlgorithm::merge(stateLeft, stateRight);
MLPLossAlgorithm::merge(stateLeft, stateRight);
+
// The following numRows update, cannot be put above, because the model
// averaging depends on their original values
stateLeft.algo.numRows += stateRight.algo.numRows;
@@ -154,20 +164,17 @@ mlp_igd_final::run(AnyType &args) {
// a deep copy.
MLPIGDState<MutableArrayHandle<double> > state = args[0];
- // Aggregates that haven't seen any data just return Null.
if (state.algo.numRows == 0) { return Null(); }
- // finalizing
- MLPIGDAlgorithm::final(state);
-
- // Return the mean loss
+ L2<MLPModelType>::lambda = state.task.lambda;
state.algo.loss = state.algo.loss/static_cast<double>(state.algo.numRows);
+ state.algo.loss += L2<MLPModelType>::loss(state.task.model);
+ MLPIGDAlgorithm::final(state);
- // for stepsize tuning
- std::stringstream debug;
- debug << "loss: " << state.algo.loss;
- elog(INFO,"%s",debug.str().c_str());
- return state;
+ AnyType tuple;
+ tuple << state
+ << (double)state.algo.loss;
+ return tuple;
}
/**
@@ -191,10 +198,9 @@ internal_mlp_igd_result::run(AnyType &args) {
flattenU;
flattenU.rebind(&state.task.model.u[0](0, 0),
state.task.model.arraySize(state.task.numberOfStages,
- state.task.numbersOfUnits)-2); // -2 for is_classification and activation
+ state.task.numbersOfUnits));
double loss = state.algo.loss;
-
AnyType tuple;
tuple << flattenU
<< loss;
@@ -204,27 +210,25 @@ internal_mlp_igd_result::run(AnyType &args) {
AnyType
internal_predict_mlp::run(AnyType &args) {
MLPModel<MutableArrayHandle<double> > model;
- MappedColumnVector indVar;
+ ColumnVector indVar;
int is_response = args[5].getAs<int>();
+ MappedColumnVector x_means = args[6].getAs<MappedColumnVector>();
+ MappedColumnVector x_stds = args[7].getAs<MappedColumnVector>();
MappedColumnVector coeff = args[0].getAs<MappedColumnVector>();
MappedColumnVector layerSizes = args[4].getAs<MappedColumnVector>();
// Input layer doesn't count
size_t numberOfStages = layerSizes.size()-1;
- //#TODO this should be an int not a double
double is_classification = args[2].getAs<double>();
double activation = args[3].getAs<double>();
bool get_class = is_classification && is_response;
model.rebind(&is_classification,&activation,&coeff.data()[0],numberOfStages,&layerSizes.data()[0]);
try {
- MappedColumnVector x = args[1].getAs<MappedColumnVector>();
- // x is a const reference, we can only rebind to change its pointer
- indVar.rebind(x.memoryHandle(), x.size());
+ indVar = (args[1].getAs<MappedColumnVector>()-x_means).cwiseQuotient(x_stds);
} catch (const ArrayWithNullException &e) {
return args[0];
}
ColumnVector prediction = MLPTask::predict(model, indVar, get_class);
-
return prediction;
}
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/task/l2.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/task/l2.hpp b/src/modules/convex/task/l2.hpp
index a2e7f2f..308cfd9 100644
--- a/src/modules/convex/task/l2.hpp
+++ b/src/modules/convex/task/l2.hpp
@@ -84,7 +84,8 @@ double
L2<Model, Hessian>::loss(
const model_type &model) {
// 1/2 * lambda * || w ||^2
- return lambda * model.norm()*model.norm() / 2;
+ double norm = model.norm();
+ return lambda * norm*norm / 2;
}
} // namespace convex
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/task/mlp.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/task/mlp.hpp b/src/modules/convex/task/mlp.hpp
index e66492b..0032b81 100644
--- a/src/modules/convex/task/mlp.hpp
+++ b/src/modules/convex/task/mlp.hpp
@@ -26,6 +26,8 @@
#ifndef MADLIB_MODULES_CONVEX_TASK_MLP_HPP_
#define MADLIB_MODULES_CONVEX_TASK_MLP_HPP_
+#include <dbconnector/dbconnector.hpp>
+
namespace madlib {
namespace modules {
@@ -46,24 +48,26 @@ public:
static void gradientInPlace(
model_type &model,
- const independent_variables_type &y,
- const dependent_variable_type &z,
+ const independent_variables_type &x,
+ const dependent_variable_type &y,
const double &stepsize);
static double loss(
const model_type &model,
- const independent_variables_type &y,
- const dependent_variable_type &z);
+ const independent_variables_type &x,
+ const dependent_variable_type &y);
static ColumnVector predict(
const model_type &model,
- const independent_variables_type &y,
+ const independent_variables_type &x,
const bool get_class);
const static int RELU = 0;
const static int SIGMOID = 1;
const static int TANH = 2;
+ static double lambda;
+private:
static double sigmoid(const double &xi) {
return 1. / (1. + std::exp(-xi));
}
@@ -76,9 +80,6 @@ public:
return std::tanh(xi);
}
-
-private:
-
static double sigmoidDerivative(const double &xi) {
double value = sigmoid(xi);
return value * (1. - value);
@@ -95,59 +96,39 @@ private:
static void feedForward(
const model_type &model,
- const independent_variables_type &y,
+ const independent_variables_type &x,
std::vector<ColumnVector> &net,
- std::vector<ColumnVector> &x);
-
- static void endLayerDeltaError(
- const std::vector<ColumnVector> &net,
- const std::vector<ColumnVector> &x,
- const dependent_variable_type &z,
- ColumnVector &delta_N);
+ std::vector<ColumnVector> &o);
- static void errorBackPropagation(
- const ColumnVector &delta_N,
+ static void backPropogate(
+ const ColumnVector &y_true,
+ const ColumnVector &y_estimated,
const std::vector<ColumnVector> &net,
const model_type &model,
std::vector<ColumnVector> &delta);
};
template <class Model, class Tuple>
+double MLP<Model, Tuple>::lambda = 0;
+
+template <class Model, class Tuple>
void
MLP<Model, Tuple>::gradientInPlace(
model_type &model,
- const independent_variables_type &y,
- const dependent_variable_type &z,
+ const independent_variables_type &x,
+ const dependent_variable_type &y_true,
const double &stepsize) {
- (void) model;
- (void) z;
- (void) y;
- (void) stepsize;
- std::vector<ColumnVector> net;
- std::vector<ColumnVector> x;
- std::vector<ColumnVector> delta;
- ColumnVector delta_N;
-
- feedForward(model, y, net, x);
- endLayerDeltaError(net, x, z, delta_N);
- errorBackPropagation(delta_N, net, model, delta);
-
uint16_t N = model.u.size(); // assuming nu. of layers >= 1
- uint16_t k, s, j;
+ uint16_t k;
+ std::vector<ColumnVector> net, o, delta;
- std::vector<uint16_t> n; n.clear(); //nu. of units in each layer
+ feedForward(model, x, net, o);
+ backPropogate(y_true, o.back(), net, model, delta);
- n.push_back(model.u[0].rows() - 1);
- for (k = 1; k <= N; k ++) {
- n.push_back(model.u[k-1].cols() - 1);
- }
-
- for (k=1; k <= N; k++){
- for (s=0; s <= n[k-1]; s++){
- for (j=1; j <= n[k]; j++){
- model.u[k-1](s,j) -= stepsize * (delta[k](j) * x[k-1](s));
- }
- }
+ for (k=0; k < N; k++){
+ Matrix regularization = MLP<Model, Tuple>::lambda*model.u[k];
+ regularization.row(0).setZero(); // Do not update bias
+ model.u[k] -= stepsize * (o[k] * delta[k].transpose() + regularization);
}
}
@@ -155,54 +136,40 @@ template <class Model, class Tuple>
double
MLP<Model, Tuple>::loss(
const model_type &model,
- const independent_variables_type &y,
- const dependent_variable_type &z) {
+ const independent_variables_type &x,
+ const dependent_variable_type &y_true) {
// Here we compute the loss. In the case of regression we use sum of square errors
// In the case of classification the loss term is cross entropy.
- std::vector<ColumnVector> net;
- std::vector<ColumnVector> x;
-
- feedForward(model, y, net, x);
- double loss = 0.;
- uint16_t j;
-
- for (j = 1; j < z.rows() + 1; j ++) {
- if(model.is_classification){
- // Cross entropy: RHS term is negative
- loss -= z(j-1)*std::log(x.back()(j)) + (1-z(j-1))*std::log(1-x.back()(j));
- }else{
- double diff = x.back()(j) - z(j-1);
- loss += diff * diff;
- }
+ std::vector<ColumnVector> net, o;
+ feedForward(model, x, net, o);
+ ColumnVector y_estimated = o.back();
+
+ if(model.is_classification){
+ double clip = 1.e-10;
+ y_estimated = y_estimated.cwiseMax(clip).cwiseMin(1.-clip);
+ return - (y_true.array()*y_estimated.array().log()
+ + (-y_true.array()+1)*(-y_estimated.array()+1).log()).sum();
}
- if(!model.is_classification){
- loss /= 2.;
- }else{
- loss /= z.rows();
+ else{
+ return 0.5 * (y_estimated-y_true).squaredNorm();
}
- return loss;
}
template <class Model, class Tuple>
ColumnVector
MLP<Model, Tuple>::predict(
const model_type &model,
- const independent_variables_type &y,
- const bool get_class
- ) {
- (void) model;
- (void) y;
- std::vector<ColumnVector> net;
- std::vector<ColumnVector> x;
-
- feedForward(model, y, net, x);
- // Don't return the offset
- ColumnVector output = x.back().tail(x.back().size()-1);
- if(get_class){
+ const independent_variables_type &x,
+ const bool get_class) {
+ std::vector<ColumnVector> net, o;
+
+ feedForward(model, x, net, o);
+ ColumnVector output = o.back();
+ if(get_class){ // Return a length 1 array with the predicted index
int max_idx;
output.maxCoeff(&max_idx);
output.resize(1);
- output[0] = (double)max_idx;
+ output[0] = (double) max_idx;
}
return output;
}
@@ -212,113 +179,65 @@ template <class Model, class Tuple>
void
MLP<Model, Tuple>::feedForward(
const model_type &model,
- const independent_variables_type &y,
+ const independent_variables_type &x,
std::vector<ColumnVector> &net,
- std::vector<ColumnVector> &x){
- // meta data and x_k^0 = 1
- uint16_t k, j, s;
- uint16_t N = model.u.size(); // assuming >= 1
+ std::vector<ColumnVector> &o){
+ uint16_t k, N;
+ N = model.u.size(); // assuming >= 1
net.resize(N + 1);
- x.resize(N + 1);
-
- std::vector<uint16_t> n; n.clear();
- n.push_back(model.u[0].rows() - 1);
- x[0].resize(n[0] + 1);
- x[0](0) = 1.;
- for (k = 1; k <= N; k ++) {
- n.push_back(model.u[k-1].cols() - 1);
- net[k].resize(n[k] + 1);
- x[k].resize(n[k] + 1);
- // Bias
- x[k](0) = 1.;
- }
+ o.resize(N + 1);
+
+ double (*activation)(const double&);
+ if(model.activation==RELU)
+ activation = &relu;
+ else if(model.activation==SIGMOID)
+ activation = &sigmoid;
+ else
+ activation = &tanh;
- // y is a mapped parameter from DB, aligning with x here
- for (j = 1; j <= n[0]; j ++) { x[0](j) = y(j-1); }
+ o[0].resize(x.size()+1);
+ o[0] << 1.,x;
for (k = 1; k < N; k ++) {
- for (j = 1; j <= n[k]; j ++) {
- net[k](j) = 0.;
- for (s = 0; s <= n[k-1]; s ++) {
- net[k](j) += x[k-1](s) * model.u[k-1](s, j);
- }
- if(model.activation==RELU)
- x[k](j) = relu(net[k](j));
- else if(model.activation==SIGMOID)
- x[k](j) = sigmoid(net[k](j));
- else
- x[k](j) = tanh(net[k](j));
- }
+ net[k] = model.u[k-1].transpose() * o[k-1];
+ o[k] = ColumnVector(model.u[k-1].cols()+1);
+ o[k] << 1., net[k].unaryExpr(activation);
}
+ o[N] = model.u[N-1].transpose() * o[N-1];
- // output layer computation
- for (j = 1; j <= n[N]; j ++) {
- x[N](j) = 0.;
- for (s = 0; s <= n[N-1]; s ++) {
- x[N](j) += x[N-1](s) * model.u[N-1](s, j);
- }
- }
// Numerically stable calculation of softmax
- ColumnVector last_x = x[N].tail(n[N]);
if(model.is_classification){
- double max_x = last_x.maxCoeff();
- last_x = (last_x.array() - max_x).exp();
- last_x /= last_x.sum();
+ double max_x = o[N].maxCoeff();
+ o[N] = (o[N].array() - max_x).exp();
+ o[N] /= o[N].sum();
}
- x[N].tail(n[N]) = last_x;
}
template <class Model, class Tuple>
void
-MLP<Model, Tuple>::endLayerDeltaError(
- const std::vector<ColumnVector> &net,
- const std::vector<ColumnVector> &x,
- const dependent_variable_type &z,
- ColumnVector &delta_N) {
- //meta data
- uint16_t t;
- uint16_t N = x.size() - 1; // assuming >= 1
- uint16_t n_N = x[N].rows() - 1;
- delta_N.resize(n_N + 1);
-
- for (t = 1; t <= n_N; t ++) {
- delta_N(t) = (x[N](t) - z(t-1));
- }
-}
-
-template <class Model, class Tuple>
-void
-MLP<Model, Tuple>::errorBackPropagation(
- const ColumnVector &delta_N,
+MLP<Model, Tuple>::backPropogate(
+ const ColumnVector &y_true,
+ const ColumnVector &y_estimated,
const std::vector<ColumnVector> &net,
const model_type &model,
std::vector<ColumnVector> &delta) {
- // meta data
- uint16_t k, j, t;
- uint16_t N = model.u.size(); // assuming >= 1
- delta.resize(N + 1);
-
- std::vector<uint16_t> n; n.clear();
- n.push_back(model.u[0].rows() - 1);
- for (k = 1; k <= N; k ++) {
- n.push_back(model.u[k-1].cols() - 1);
- delta[k].resize(n[k]+1);
- }
- delta[N] = delta_N;
-
+ uint16_t k, N;
+ N = model.u.size(); // assuming >= 1
+ delta.resize(N);
+
+ double (*activationDerivative)(const double&);
+ if(model.activation==RELU)
+ activationDerivative = &reluDerivative;
+ else if(model.activation==SIGMOID)
+ activationDerivative = &sigmoidDerivative;
+ else
+ activationDerivative = &tanhDerivative;
+
+ delta.back() = y_estimated - y_true;
for (k = N - 1; k >= 1; k --) {
- for (j = 0; j <= n[k]; j ++) {
- delta[k](j) = 0.;
- for (t = 1; t <= n[k+1]; t ++) {
- delta[k](j) += delta[k+1](t) * model.u[k](j, t);
- }
- if(model.activation==RELU)
- delta[k](j) = delta[k](j) * reluDerivative(net[k](j));
- else if(model.activation==SIGMOID)
- delta[k](j) = delta[k](j) * sigmoidDerivative(net[k](j));
- else
- delta[k](j) = delta[k](j) * tanhDerivative(net[k](j));
- }
+ // Do not include the bias terms
+ delta[k-1] = model.u[k].bottomRows(model.u[k].rows()-1) * delta[k];
+ delta[k-1] = delta[k-1].array() * net[k].unaryExpr(activationDerivative).array();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/type/model.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/model.hpp b/src/modules/convex/type/model.hpp
index 9b68af8..679dab4 100644
--- a/src/modules/convex/type/model.hpp
+++ b/src/modules/convex/type/model.hpp
@@ -121,51 +121,9 @@ struct MLPModel {
const double *n = inNumbersOfUnits;
size_t k;
for (k = 1; k <= N; k ++) {
- size += (n[k-1] + 1) * (n[k] + 1);
- }
- return 1 + // is_classification
- 1 + // activation
- size; // weights (u)
- }
-
- /**
- * @brief Initialize the model randomly
- */
- void initialize(int is_classification_in, int activation_in) {
- is_classification = is_classification_in;
- activation = activation_in;
- // using madlib::dbconnector::$database::NativeRandomNumberGenerator
- NativeRandomNumberGenerator rng;
-
- // Scaling factor for weight initialization
- double epsilon = 0.0001;
-
-
- double base = rng.min();
- double span = rng.max() - base;
-
- uint16_t N = u.size(); // assuming nu. of layers >= 1
- uint16_t k, s, j;
-
- std::vector<uint16_t> n; n.clear(); //nu. of units in each layer
-
- n.push_back(u[0].rows() - 1);
- for (k = 1; k <= N; k ++) {
- n.push_back(u[k-1].cols() - 1);
- }
-
- for (k=1; k <= N; k++){
- for (s=0; s <= n[k-1]; s++){
- u[k-1](s,0)=1;
- for (j=1; j <= n[k]; j++){
- // Generate normal(0,epsilon) value using Box-Muller transform
- double u1 = (rng()-base)/span;
- double u2 = (rng()-base)/span;
- double z = std::sqrt(-2*std::log(u1))*std::cos(2*M_PI*u2);
- u[k-1](s,j) = epsilon*z;
- }
- }
+ size += (n[k-1] + 1) * (n[k]);
}
+ return size; // weights (u)
}
uint32_t rebind(const double *is_classification_in,
@@ -185,20 +143,38 @@ struct MLPModel {
for (k = 1; k <= N; k ++) {
u.push_back(Eigen::Map<Matrix >(
const_cast<double*>(data + sizeOfU),
- n[k-1] + 1, n[k] + 1));
- sizeOfU += (n[k-1] + 1) * (n[k] + 1);
+ n[k-1] + 1, n[k]));
+ sizeOfU += (n[k-1] + 1) * (n[k]);
}
return sizeOfU;
}
+ double norm() const {
+ double norm = 0.;
+ size_t k;
+ for (k = 0; k < u.size(); k ++) {
+ norm+=u[k].bottomRows(u[k].rows()-1).squaredNorm();
+ }
+ return std::sqrt(norm);
+ }
+
+ void setZero(){
+ size_t k;
+ for (k = 1; k <= u.size(); k ++) {
+ u[k-1].setZero();
+ }
+ }
+
/*
* Some operator wrappers for u.
*/
MLPModel &operator*=(const double &c) {
+ // Note that when scaling the model, you should
+ // not update the bias.
size_t k;
for (k = 1; k <= u.size(); k ++) {
- u[k-1] *= c;
+ u[k-1] *= c;
}
return *this;
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/type/state.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/state.hpp b/src/modules/convex/type/state.hpp
index 66f5023..2cb2643 100644
--- a/src/modules/convex/type/state.hpp
+++ b/src/modules/convex/type/state.hpp
@@ -629,6 +629,9 @@ public:
return 1 // numberOfStages = N
+ (inNumberOfStages + 1) // numbersOfUnits: size is (N + 1)
+ 1 // stepsize
+ + 1 // lambda
+ + 1 // is_classification
+ + 1 // activation
+ sizeOfModel // model
+ 1 // numRows
@@ -645,17 +648,16 @@ private:
* - 0: numberOfStages (number of stages (layers), design doc: N)
* - 1: numbersOfUnits (numbers of activation units, design doc: n_0,...,n_N)
* - N + 2: stepsize (step size of gradient steps)
- * - N + 3: is_classification (do classification)
- * - N + 4: activation (activation function)
- * - N + 5: coeff (coefficients, design doc: u)
+ * - N + 3: lambda (regularization term)
+ * - N + 4: is_classification (do classification)
+ * - N + 5: activation (activation function)
+ * - N + 6: coeff (coefficients, design doc: u)
*
* Intra-iteration components (updated in transition step):
* sizeOfModel = # of entries in u + 2, (\sum_1^N n_{k-1} n_k)
- * - N + 3 + sizeOfModel: numRows (number of rows processed in this iteration)
- * - N + 4 + sizeOfModel: loss (loss value, the sum of squared errors)
- * - N + 5 + sizeOfModel: is_classification (do classification)
- * - N + 6 + sizeOfModel: activation (activation function)
- * - N + 7 + sizeOfModel: coeff (volatile model for incrementally update)
+ * - N + 6 + sizeOfModel: coeff (volatile model for incrementally update)
+ * - N + 6 + 2*sizeOfModel: numRows (number of rows processed in this iteration)
+ * - N + 7 + 2*sizeOfModel: loss (loss value, the sum of squared errors)
*/
void rebind() {
task.numberOfStages.rebind(&mStorage[0]);
@@ -663,13 +665,14 @@ private:
task.numbersOfUnits =
reinterpret_cast<dimension_pointer_type>(&mStorage[1]);
task.stepsize.rebind(&mStorage[N + 2]);
- uint32_t sizeOfModel = task.model.rebind(&mStorage[N + 3],&mStorage[N + 4],&mStorage[N + 5],
+ task.lambda.rebind(&mStorage[N + 3]);
+ uint32_t sizeOfModel = task.model.rebind(&mStorage[N + 4],&mStorage[N + 5],&mStorage[N + 6],
task.numberOfStages, task.numbersOfUnits);
- algo.numRows.rebind(&mStorage[N + 5 + sizeOfModel]);
- algo.loss.rebind(&mStorage[N + 6 + sizeOfModel]);
- algo.incrModel.rebind(&mStorage[N + 3],&mStorage[N + 4],&mStorage[N + 7 + sizeOfModel],
+ algo.incrModel.rebind(&mStorage[N + 4],&mStorage[N + 5],&mStorage[N + 6 + sizeOfModel],
task.numberOfStages, task.numbersOfUnits);
+ algo.numRows.rebind(&mStorage[N + 6 + 2*sizeOfModel]);
+ algo.loss.rebind(&mStorage[N + 7 + 2*sizeOfModel]);
}
@@ -685,13 +688,14 @@ public:
dimension_type numberOfStages;
dimension_pointer_type numbersOfUnits;
numeric_type stepsize;
+ numeric_type lambda;
MLPModel<Handle> model;
} task;
struct AlgoState {
+ MLPModel<Handle> incrModel;
count_type numRows;
numeric_type loss;
- MLPModel<Handle> incrModel;
} algo;
};
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/modules/convex/type/tuple.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/type/tuple.hpp b/src/modules/convex/type/tuple.hpp
index 4b9c55e..824ed90 100644
--- a/src/modules/convex/type/tuple.hpp
+++ b/src/modules/convex/type/tuple.hpp
@@ -64,7 +64,7 @@ typedef ExampleTuple<MappedColumnVector, double> GLMTuple;
// madlib::modules::convex::MatrixIndex
typedef ExampleTuple<MatrixIndex, double> LMFTuple;
-typedef ExampleTuple<MappedColumnVector, MappedColumnVector> MLPTuple;
+typedef ExampleTuple<ColumnVector, MappedColumnVector> MLPTuple;
} // namespace convex
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ff1b0f88/src/ports/postgres/modules/convex/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp.sql_in b/src/ports/postgres/modules/convex/mlp.sql_in
index 400f892..6b9d828 100644
--- a/src/ports/postgres/modules/convex/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/mlp.sql_in
@@ -29,23 +29,23 @@
m4_include(`SQLCommon.m4')
/**
-@addtogroup grp_mlp
+@addtogroup grp_nn
<div class="toc"><b>Contents</b><ul>
<li class="level1"><a href="#mlp_classification">Classification</a></li>
<li class="level1"><a href="#mlp_regression">Regression</a></li>
-<li class="level1"><a href="#optimization_params">Optimizer Parameters</a></li>
-<li class="level1"><a href="#predict">Prediction Functions/a></li>
+<li class="level1"><a href="#optimizer_params">Optimizer Parameters</a></li>
+<li class="level1"><a href="#predict">Prediction Functions</a></li>
<li class="level1"><a href="#example">Examples</a></li>
<li class="level1"><a href="#background">Technical Background</a></li>
<li class="level1"><a href="#literature">Literature</a></li>
<li class="level1"><a href="#related">Related Topics</a></li>
</ul></div>
-Multilayer Perceptron (MLP) is a model for regression and
-classification.
+Multilayer Perceptron (MLP) is a type of neural network that can be
+used for regression and classification.
-Also called "vanilla neural networks", they consist of several
+Also called "vanilla neural networks", MLPs consist of several
fully connected hidden layers with non-linear activation
functions. In the case of classification, the final layer of the
neural net has as many nodes as classes, and the output of the
@@ -67,7 +67,8 @@ mlp_classification(
dependent_varname,
hidden_layer_sizes,
optimizer_params,
- activation
+ activation,
+ weights
)
</pre>
\b Arguments
@@ -75,6 +76,7 @@ mlp_classification(
<DT>source_table</DT>
<DD>TEXT. Name of the table containing the training data.</DD>
+
<DT>output_table</DT>
<DD>TEXT. Name of the output table containing the model. Details of the output
tables are provided below.
@@ -83,19 +85,22 @@ mlp_classification(
<DT>independent_varname</DT>
<DD>TEXT. Expression list to evaluate for the
independent variables. An intercept variable should not be included as part
- of this expression. Please note that expression should be able to be cast
- to DOUBLE PRECISION[].
+ of this expression. <b>Please note that expression should be encoded properly.</b>
+ All values are cast to DOUBLE PRECISION, so categorical variables should be
+ one-hot or dummy encoded. See <a href="group__grp__encode__categorical.html">here</a>
+ for more details.
</DD>
+
<DT>dependent_varname</DT>
<DD> TEXT. Name of the dependent variable column. For classification, supported types are:
text, varchar, character varying, char, character
integer, smallint, bigint, and boolean. </DD>
- <DT>hidden_layer_sizes (optional)</DT>
- <DD>INTEGER[], default: ARRAY[].
+ <DT>hidden_layer_sizes </DT>
+ <DD>INTEGER[]
The number of neurons in each hidden layer. The length of this array will
- determine the number of hidden layers. Empty for no hidden layers.
+ determine the number of hidden layers. NULL for no hidden layers.
</DD>
@@ -111,6 +116,25 @@ mlp_classification(
'relu', and 'tanh'. The text can be any prefix of the three
strings; for e.g., activation='s' will use the sigmoid activation.
</DD>
+
+
+ <DT>weights (optional)</DT>
+ <DD>TEXT, default: NULL.
+ Weights for input rows. Column name which specifies the weight for each input row.
+ This weight will be incorporated into the update during SGD, and will not be used
+ for loss calculations. If not specified, weight for each row will default to 1.
+ Column should be a numeric type.
+ </DD>
+
+ <DT>warm_start (optional)</DT>
+ <DD>BOOLEAN, default: FALSE.
+ Initalize weights with the coefficients from the last call. If true, weights will
+ be initialized from output_table. Note that all parameters other than optimizer_params,
+ and verbose must remain constant between calls to warm_start.
+ </DD>
+
+ <DT>verbose (optional)</DT>
+ <DD>BOOLEAN, default: FALSE. Provides verbose output of the results of training.</DD>
</DL>
<b>Output tables</b>
@@ -142,24 +166,28 @@ A summary table named \<output_table\>_summary is also created, which has the fo
<td>The source table.</td>
</tr>
<tr>
- <th>dependent_varname</th>
- <td>The dependent variable.</td>
- </tr>
- <tr>
<th>independent_varname</th>
<td>The independent variables.</td>
</tr>
<tr>
+ <th>dependent_varname</th>
+ <td>The dependent variable.</td>
+ </tr>
+ <tr>
<th>tolerance</th>
<td>The tolerance as given in optimizer_params.</td>
</tr>
<tr>
- <th>step_size</th>
- <td>The step size as given in optimizer_params.</td>
+ <th>learning_rate_init</th>
+ <td>The initial learning rate as given in optimizer_params.</td>
+ </tr>
+ <tr>
+ <th>learning_rate_policy</th>
+ <td>The learning rate policy as given in optimizer_params.</td>
</tr>
<tr>
<th>n_iterations</th>
- <td>The number of iterations run</td>
+ <td>The number of iterations run.</td>
</tr>
<tr>
<th>n_tries</th>
@@ -170,17 +198,29 @@ A summary table named \<output_table\>_summary is also created, which has the fo
<td>The number of units in each layer including the input and output layer.</td>
</tr>
<tr>
- <th>activation_function</th>
+ <th>activation</th>
<td>The activation function.</td>
</tr>
<tr>
<th>is_classification</th>
<td>True if the model was trained for classification, False if it was trained
- for regression</td>
+ for regression.</td>
</tr>
<tr>
<th>classes</th>
- <td>The classes which were trained against (empty for regression)</td>
+ <td>The classes which were trained against (empty for regression).</td>
+ </tr>
+ <tr>
+ <th>weights</th>
+ <td>The weight column used during training.</td>
+ </tr>
+ <tr>
+ <th>x_means</th>
+ <td>The mean for all input features (used for normalization).</td>
+ </tr>
+ <tr>
+ <th>x_stds</th>
+ <td>The standard deviation for all input features (used for normalization).</td>
</tr>
</table>
@@ -197,7 +237,9 @@ mlp_regression(source_table,
dependent_varname,
hidden_layer_sizes,
optimizer_params,
- activation
+ activation,
+ weights,
+ verbose
)
</pre>
@@ -205,7 +247,7 @@ mlp_regression(source_table,
Specifications for regression are largely the same as for classification. In the
model table, the loss will refer to mean square error instead of cross entropy. In the
-summary table, there is classes column. The following
+summary table, there is no classes column. The following
arguments have specifications which differ from mlp_classification:
<DL class="arglist">
<DT>dependent_varname</DT>
@@ -226,7 +268,7 @@ the parameter is ignored.
<pre class="syntax">
- 'step_size = <value>,
+ 'learning_rate_init = <value>,
n_iterations = <value>,
n_tries = <value>,
tolerance = <value>'
@@ -234,27 +276,57 @@ the parameter is ignored.
\b Optimizer Parameters
<DL class="arglist">
-<DT>step_size</dt>
-<DD>Default: [0.001].
+<DT>learning_rate_init</dt>
+<DD>Default: 0.001.
Also known as the learning rate. A small value is usually desirable to
ensure convergence, while a large value provides more room for progress during
training. Since the best value depends on the condition number of the data, in
practice one often tunes this parameter.
</DD>
+<DT>learning_rate_policy</dt>
+<DD>Default: constant.
+One of 'constant', 'exp', 'inv' or 'step' or any prefix of these.
+'constant': learning_rate = learning_rate_init
+'exp': learning_rate = learning_rate_init * gamma^(iter)
+'inv': learning_rate = learning_rate_init * (iter+1)^(-power)
+'step': learning_rate = learning_rate_init * gamma^(floor(iter/iterations_per_step))
+Where iter is the current iteration of SGD.
+</DD>
+
+<DT>gamma</dt>
+<DD>Default: 0.1.
+Decay rate for learning rate when learning_rate_policy is 'exp' or 'step'.
+</DD>
+
+<DT>power</dt>
+<DD>Default: 0.5.
+Exponent for learning_rate_policy = 'inv'.
+</DD>
+
+<DT>iterations_per_step</dt>
+<DD>Default: 100.
+Number of iterations to run before decreasing the learning rate by
+a factor of gamma. Valid for learning rate policy = 'step'.
+</DD>
<DT>n_iterations</dt>
<DD>Default: [100]. The maximum number of iterations allowed.
</DD>
+
<DT>n_tries</dt>
<DD>Default: [1]. Number of times to retrain the network with randomly initialized
-weights
+weights.
+</DD>
+
+<DT>lambda</dt>
+<DD>Default: 0. The regularization coefficient for L2 regularization.
</DD>
<DT>tolerance</dt>
<DD>Default: 0.001. The criterion to end iterations. The training stops whenever
-<the difference between the training models of two consecutive iterations is
-<smaller than \e tolerance or the iteration number is larger than \e max_iter.
+the difference between the training models of two consecutive iterations is
+smaller than \e tolerance or the iteration number is larger than \e max_iter.
</DD>
</DL>
@@ -293,19 +365,19 @@ table name is already in use, then an error is returned. Table contains:</DD>
<td>Gives the 'id' for each prediction, corresponding to each row from the data_table.</td>
</tr>
<tr>
- <th>estimated_<COL_NAME></th>
+ <th>estimated_COL_NAME</th>
<td>
(For pred_type='response') The estimated class
for classification or value for regression, where
- <COL_NAME> is the name of the column to be
- predicted from training data
+ COL_NAME is the name of the column to be
+ predicted from training data.
</td>
</tr>
<tr>
- <th>prob_<CLASS></th>
+ <th>prob_CLASS</th>
<td>
(For pred_type='prob' for classification) The
- probability of a given class <CLASS> as given by
+ probability of a given class CLASS as given by
softmax. There will be one column for each class
in the training data.
</td>
@@ -315,10 +387,10 @@ table name is already in use, then an error is returned. Table contains:</DD>
<DT>pred_type</DT>
<DD>TEXT.
-the type of output requested:
+The type of output requested:
'response' gives the actual prediction,
'prob' gives the probability of each class.
-for regression, only type='response' is defined.
+For regression, only type='response' is defined.
The name of the id column in the input table.</DD>
</DL>
</table>
@@ -363,30 +435,36 @@ The model will be written to mlp_model.
<pre class="example">
DROP TABLE IF EXISTS mlp_model;
DROP TABLE IF EXISTS mlp_model_summary;
+-- Set seed so results are reproducible
+SELECT setseed(0);
SELECT madlib.mlp_classification(
'iris_data', -- Source table
'mlp_model', -- Destination table
'attributes', -- Input features
'class_text', -- Label
ARRAY[5], -- Number of units per layer
- 'step_size=0.003,
- n_iterations=5000,
+ 'learning_rate_init=0.003,
+ n_iterations=500,
tolerance=0', -- Optimizer params
- 'tanh'); -- Activation function
+ 'tanh', -- Activation function
+ NULL, -- Default weight (1)
+ FALSE, -- No warm start
+ TRUE -- Verbose
+);
</pre>
-# View the result for the model.
<pre class="example">
-- Set extended display on for easier reading of output
\\x ON
--- Neural net Initialization is non-deterministic, so your results may vary
+-- Results may vary depending on platform
SELECT * FROM mlp_model;
</pre>
Result:
<pre class="result">
--[ RECORD 1 ]--+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-coeff | {1,1,1,1,1,0.136374930803,0.188739676875,0.662387810001,-1.03381622734,-0.469961067046,0.0614006983397,0.0811504589436,0.299008228258,-0.47391918521,-0.215098143699,0.10519213944,0.145844617525,0.511683525606,-0.800215552382,-0.36417142683,0.120751709056,0.167531106521,0.587074895969,-0.916946198095,-0.417055067449,0.0539541885146,0.0694359704131,0.262598585854,-0.419234805076,-0.189915344282,1,1,1,1,1,1,0.105645702152,1.46247470474,0.484457903226,0.965962824478,1.19361986431,0.419805760087,-0.105696503487,-1.46245956666,-0.484427811691,-0.965730981426,-1.19365280555,-0.419973628863}
-loss | 0.0184092375519
-num_iterations | 5000
+-[ RECORD 1 ]--+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+coeff | {-0.172392477419,-0.0836446652758,-0.0162194484142,-0.647268294231,-0.504884325538,0.184825723596,0.351728174731,-0.601148967035,0.720999542651,0.26521898248,0.245760922013,0.264645322438,-0.349957739904,0.797653395667,0.725747963566,-0.344498001796,0.261481840947,0.329074383545,0.379503434339,-0.267398086353,-0.0238069072658,0.330239268187,-0.178736289201,-0.0563356339946,-0.0333791780453,0.262137386864,0.491390436498,-1.02635831573,-1.29541478382,0.246017274,-0.0623575215434,0.0826297373887,-0.671671189842,0.853494672576,1.21671423502,0.296424359217,0.15294606861}
+loss | 0.0136695756314
+num_iterations | 500
</pre>
-# Next train a regression example. First create some test data. This dataset
contains housing prices data.
@@ -419,30 +497,36 @@ COPY lin_housing (x, grp_by_col, y) FROM STDIN NULL '?' DELIMITER '|';
<pre class="example">
DROP TABLE IF EXISTS mlp_regress;
DROP TABLE IF EXISTS mlp_regress_summary;
+SELECT setseed(0);
SELECT madlib.mlp_regression(
- 'lin_housing', -- Source table
- 'mlp_regress', -- Desination table
- 'x', -- Input features
- 'y', -- Dependent variable
- ARRAY[5,5], -- Number of units per layer
- 'step_size=0.000007,
- n_iterations=10000,
+ 'lin_housing', -- Source table
+ 'mlp_regress', -- Desination table
+ 'x', -- Input features
+ 'y', -- Dependent variable
+ ARRAY[25,25], -- Number of units per layer
+ 'learning_rate_init=0.001,
+ n_iterations=500,
+ lambda=0.001,
tolerance=0',
- 'relu');
+ 'relu',
+ NULL, -- Default weight (1)
+ FALSE, -- No warm start
+ TRUE -- Verbose
+);
</pre>
-# Check the results of the model
<pre class="example">
--- Set extended display on for easier reading of output
+-- Set extended display on for easier reading of output.
\\x ON
--- Neural net Initialization is non-deterministic, so your results may vary
+-- Results may vary depending on platform.
SELECT * FROM mlp_regress;
</pre>
Result:
<pre class="result">
--[ RECORD 1 ]--+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------
-coeff | {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2.79506311399e-05,3.56715008915e-05,-6.09333559685e-05,0.000251228318768,-0.000224772841379,-3.71863030857e-05,-3.5757865148e-06,5.27936784854e-05,-2.48474166186e-05,6.19731184294e-05,3.07638968743e-05,6.8964698578e-06,0.000106016701083,-1.71484730318e-05,1.18691881812e-05,-0.000163975464208,0.000170026304906,3.11688265279e-05,0.000177050148787,-1.58265976603e-05,2.70144422657e-05,0.000112667883422,3.77575139073e-05,8.12474658795e-05,-7.90458917626e-05,0.000107566386158,-2.63771171506e-06,2.47996880915e-05,-0.00012642310887,0.000203827391081,0.000139315565565,4.86147243454e-05,-0.000176126471913,-6.47820782916e-05,-8.51592776447e-06,-6.60601176758e-05,2.91421874156e-05,6.3556873752e-05,0.000197557443129,0.000220531367259,0.000135036310289,0.000143735913975,-4.75034117786e-05,-0.000179547345838,-1.6919846786e-05,0.000162784312994,0.000268595819851,-0.000460066553287,8.69756071591e-05,-0.00311762727057,0.000126024763103,0.000205988242921
,0.003463432426,-0.00729789075286,0.00151625867549,-0.000890852767597,-0.00525016037249,0.0031043106659,0.00798041103839,-0.00552693050079,0.0232180415786,0.0230489850143,-0.0437890272341,0.0165765426407,-0.248554261758,-7.81336427846e-05,0.00558145591752,0.283465844585,-0.571699956182,0.133474351994,-0.0785181945605,-0.419269930709,0.249547772912,0.631761009875,-0.431305975666,1,1,1,1,1,1,0.0158747497572,-9.02809160806e-05,0.00015574347618,4.10805373863e-06,0.00121532434965,0.101790351335,0.0647558401493,-0.00013654998677,-9.92872075948e-06,-5.5319694394e-05,0.00519320756484,0.412736586036,0.0011998026977,-1.53688189815e-05,1.94817888201e-05,-4.63111489966e-05,7.24547899029e-05,0.00880394144485,5.45309822095e-05,-0.000140943219275,-7.96211486227e-05,-1.04337307472e-05,0.000161936762028,0.00136273797767,-4.54737243585e-05,-3.4083840736e-05,3.69286883662e-05,9.9047243188e-08,3.75014011824e-06,-9.45366086368e-08,1,1,1,1,1,1,6.67488547054,0.102754199001,0.41668912471,0.00886867296479,0
.00136206007228,-9.88642499013e-05}
-loss | 144.965776158
-num_iterations | 10000
+[ RECORD 1 ]--+-----------------------------------------------------------------------------------
+coeff | {-0.135647108464,0.0315402969485,-0.117580589352,-0.23084537701,-0.10868726702...
+loss | 0.114125125042
+num_iterations | 500
</pre>
-# Now let's look at the prediction functions. In the following examples we will
use the training data set for prediction as well, which is not usual but serves to
@@ -458,8 +542,6 @@ SELECT madlib.mlp_predict(
'mlp_prediction', -- Output table for predictions
'response' -- Output classes, not probabilities
);
--# View results
-<pre class="example">
SELECT * FROM mlp_prediction JOIN iris_data USING (id);
</pre>
Result for the classification model:
@@ -487,7 +569,7 @@ Result for the classification model:
19 | Iris-versicolor | {6.6,2.9,4.6,1.3} | Iris-versicolor | 2
20 | Iris-versicolor | {5.2,2.7,3.9,1.4} | Iris-versicolor | 2
</pre>
-Prediction using the regression model:
+-# Prediction using the regression model:
<pre class="example">
DROP TABLE IF EXISTS mlp_regress_prediction;
SELECT madlib.mlp_predict(
@@ -498,34 +580,35 @@ SELECT madlib.mlp_predict(
'response' -- Output values, not probabilities
);
</pre>
--# View results
+View results
<pre class="example">
SELECT * FROM lin_housing JOIN mlp_regress_prediction USING (id);
</pre>
Result for the regression model:
<pre class="result">
- id | x | grp_by_col | y | estimated_y
-----+-------------------------------------------------------------------------+------------+------+--------------------
- 1 | {1,0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98} | 1 | 24 | {23.2627062018087}
- 2 | {1,0.02731,0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14} | 1 | 21.6 | {25.7088419115781}
- 3 | {1,0.02729,0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03} | 1 | 34.7 | {27.5587003901404}
- 4 | {1,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94} | 1 | 33.4 | {31.1812237427816}
- 5 | {1,0.06905,0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33} | 1 | 36.2 | {30.3696873085477}
- 6 | {1,0.02985,0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21} | 1 | 28.7 | {29.5290259241882}
- 7 | {1,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43} | 1 | 22.9 | {21.1576051716888}
- 8 | {1,0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15} | 1 | 27.1 | {17.6194200563055}
- 9 | {1,0.21124,12.5,7.87,0,0.524,5.631,100,6.0821,5,311,15.2,386.63,29.93} | 1 | 16.5 | {15.1366297774139}
-10 | {1,0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1} | 1 | 18.9 | {17.6528662199369}
-11 | {1,0.22489,12.5,7.87,0,0.524,6.377,94.3,6.3467,5,311,15.2,392.52,20.45} | 1 | 15 | {17.2017487668181}
-12 | {1,0.11747,12.5,7.87,0,0.524,6.009,82.9,6.2267,5,311,15.2,396.9,13.27} | 1 | 18.9 | {19.4893860319992}
-13 | {1,0.09378,12.5,7.87,0,0.524,5.889,39,5.4509,5,311,15.2,390.5,15.71} | 1 | 21.7 | {23.2917226708039}
-14 | {1,0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26} | 1 | 20.4 | {22.8904812605193}
-15 | {1,0.63796,0,8.14,0,0.538,6.096,84.5,4.4619,4,307,21,380.02,10.26} | 1 | 18.2 | {18.2386754423677}
-16 | {1,0.62739,0,8.14,0,0.538,5.834,56.5,4.4986,4,307,21,395.62,8.47} | 1 | 19.9 | {23.28949550874}
-17 | {1,1.05393,0,8.14,0,0.538,5.935,29.3,4.4986,4,307,21,386.85,6.58} | 1 | 23.1 | {25.3288762085473}
-18 | {1,0.7842,0,8.14,0,0.538,5.99,81.7,4.2579,4,307,21,386.75,14.67} | 1 | 17.5 | {19.0203738118451}
-19 | {1,0.80271,0,8.14,0,0.538,5.456,36.6,3.7965,4,307,21,288.99,11.69} | 1 | 20.2 | {12.3162005347545}
-20 | {1,0.7258,0,8.14,0,0.538,5.727,69.5,3.7965,4,307,21,390.95,11.28} | 1 | 18.2 | {21.0902211848747}
+ id | x | grp_by_col | y | estimated_y
+----+-------------------------------------------------------------------------+------------+------+------------------
+ 1 | {1,0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98} | 1 | 24 | 23.973628645041
+ 2 | {1,0.02731,0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14} | 1 | 21.6 | 21.6389086856109
+ 3 | {1,0.02729,0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03} | 1 | 34.7 | 34.6766441639675
+ 4 | {1,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94} | 1 | 33.4 | 33.4521871118756
+ 5 | {1,0.06905,0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33} | 1 | 36.2 | 36.2899491706428
+ 6 | {1,0.02985,0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21} | 1 | 28.7 | 28.6994076427827
+ 7 | {1,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43} | 1 | 22.9 | 22.4882117113923
+ 8 | {1,0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15} | 1 | 27.1 | 26.5148927040405
+ 9 | {1,0.21124,12.5,7.87,0,0.524,5.631,100,6.0821,5,311,15.2,386.63,29.93} | 1 | 16.5 | 16.0669778867327
+ 10 | {1,0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1} | 1 | 18.9 | 17.4237448788601
+ 11 | {1,0.22489,12.5,7.87,0,0.524,6.377,94.3,6.3467,5,311,15.2,392.52,20.45} | 1 | 15 | 14.5944028616784
+ 12 | {1,0.11747,12.5,7.87,0,0.524,6.009,82.9,6.2267,5,311,15.2,396.9,13.27} | 1 | 18.9 | 19.6071061560237
+ 13 | {1,0.09378,12.5,7.87,0,0.524,5.889,39,5.4509,5,311,15.2,390.5,15.71} | 1 | 21.7 | 21.7585638578804
+ 14 | {1,0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26} | 1 | 20.4 | 20.2832271533629
+ 15 | {1,0.63796,0,8.14,0,0.538,6.096,84.5,4.4619,4,307,21,380.02,10.26} | 1 | 18.2 | 18.3440540662206
+ 16 | {1,0.62739,0,8.14,0,0.538,5.834,56.5,4.4986,4,307,21,395.62,8.47} | 1 | 19.9 | 20.0246074554594
+ 17 | {1,1.05393,0,8.14,0,0.538,5.935,29.3,4.4986,4,307,21,386.85,6.58} | 1 | 23.1 | 23.1458505146148
+ 18 | {1,0.7842,0,8.14,0,0.538,5.99,81.7,4.2579,4,307,21,386.75,14.67} | 1 | 17.5 | 17.4602306566804
+ 19 | {1,0.80271,0,8.14,0,0.538,5.456,36.6,3.7965,4,307,21,288.99,11.69} | 1 | 20.2 | 20.1785296856357
+ 20 | {1,0.7258,0,8.14,0,0.538,5.727,69.5,3.7965,4,307,21,390.95,11.28} | 1 | 18.2 | 18.1810300625137
+(20 rows)
</pre>
Note that the results you get for all examples may vary with the platform you are using.
@@ -561,6 +644,10 @@ File mlp.sql_in documenting the training function
*/
+CREATE TYPE MADLIB_SCHEMA.mlp_step_result AS (
+ state DOUBLE PRECISION[],
+ loss DOUBLE PRECISION
+);
CREATE TYPE MADLIB_SCHEMA.mlp_result AS (
coeff DOUBLE PRECISION[],
@@ -571,14 +658,22 @@ CREATE TYPE MADLIB_SCHEMA.mlp_result AS (
-- create SQL functions for IGD optimizer
--------------------------------------------------------------------------
CREATE FUNCTION MADLIB_SCHEMA.mlp_igd_transition(
- state DOUBLE PRECISION[],
- start_vec DOUBLE PRECISION[],
- end_vec DOUBLE PRECISION[],
- previous_state DOUBLE PRECISION[],
- layer_sizes DOUBLE PRECISION[],
- stepsize DOUBLE PRECISION,
- activation INTEGER,
- is_classification INTEGER)
+ state DOUBLE PRECISION[],
+ ind_var DOUBLE PRECISION[],
+ dep_var DOUBLE PRECISION[],
+ previous_state DOUBLE PRECISION[],
+ layer_sizes DOUBLE PRECISION[],
+ learning_rate_init DOUBLE PRECISION,
+ activation INTEGER,
+ is_classification INTEGER,
+ weight DOUBLE PRECISION,
+ warm_start BOOLEAN,
+ warm_start_coeff DOUBLE PRECISION[],
+ n_tuples INTEGER,
+ lambda DOUBLE PRECISION,
+ x_means DOUBLE PRECISION[],
+ x_stds DOUBLE PRECISION[]
+ )
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE;
@@ -592,7 +687,7 @@ LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION MADLIB_SCHEMA.mlp_igd_final(
state DOUBLE PRECISION[])
-RETURNS DOUBLE PRECISION[]
+RETURNS MADLIB_SCHEMA.mlp_step_result
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
@@ -601,16 +696,24 @@ LANGUAGE C IMMUTABLE STRICT;
* @brief Perform one iteration of backprop
*/
CREATE AGGREGATE MADLIB_SCHEMA.mlp_igd_step(
- /* start_vec*/ DOUBLE PRECISION[],
- /* end_vec */ DOUBLE PRECISION[],
- /* previous_state */ DOUBLE PRECISION[],
- /* layer_sizes */ DOUBLE PRECISION[],
- /* stepsize */ DOUBLE PRECISION,
- /* activation */ INTEGER,
- /* is_classification */ INTEGER )(
+ /* ind_var */ DOUBLE PRECISION[],
+ /* dep_var */ DOUBLE PRECISION[],
+ /* previous_state */ DOUBLE PRECISION[],
+ /* layer_sizes */ DOUBLE PRECISION[],
+ /* learning_rate_init */ DOUBLE PRECISION,
+ /* activation */ INTEGER,
+ /* is_classification */ INTEGER,
+ /* weight */ DOUBLE PRECISION,
+ /* warm_start */ BOOLEAN,
+ /* warm_start_coeff */ DOUBLE PRECISION[],
+ /* n_tuples */ INTEGER,
+ /* lambda */ DOUBLE PRECISION,
+ /* x_means */ DOUBLE PRECISION[],
+ /* x_stds */ DOUBLE PRECISION[]
+ )(
STYPE=DOUBLE PRECISION[],
SFUNC=MADLIB_SCHEMA.mlp_igd_transition,
- m4_ifdef(`GREENPLUM',`prefunc=MADLIB_SCHEMA.mlp_igd_merge,')
+ m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.mlp_igd_merge,')
FINALFUNC=MADLIB_SCHEMA.mlp_igd_final,
INITCOND='{0,0,0,0,0,0,0,0}'
);
@@ -631,13 +734,16 @@ LANGUAGE c IMMUTABLE STRICT;
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
- source_table VARCHAR,
- output_table VARCHAR,
- independent_varname VARCHAR,
- dependent_varname VARCHAR,
- hidden_layer_sizes INTEGER[],
- optimizer_params VARCHAR,
- activation VARCHAR
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR,
+ warm_start BOOLEAN,
+ verbose BOOLEAN
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`convex', `mlp_igd')
mlp_igd.mlp(
@@ -649,19 +755,96 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
hidden_layer_sizes,
optimizer_params,
activation,
- True
+ True,
+ weights,
+ warm_start,
+ verbose
)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR,
+ warm_start BOOLEAN
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_classification($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_classification($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_classification($1, $2, $3, $4, $5, $6, $7, NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_classification($1, $2, $3, $4, $5, $6, NULL, NULL, NULL, FALSE);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[]
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_classification($1, $2, $3, $4, $5, NULL, NULL, NULL, FALSE, FALSE);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
- source_table VARCHAR,
- output_table VARCHAR,
- independent_varname VARCHAR,
- dependent_varname VARCHAR,
- hidden_layer_sizes INTEGER[],
- optimizer_params VARCHAR,
- activation VARCHAR
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR,
+ warm_start BOOLEAN,
+ verbose BOOLEAN
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`convex', `mlp_igd')
mlp_igd.mlp(
@@ -673,11 +856,83 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
hidden_layer_sizes,
optimizer_params,
activation,
- False
+ False,
+ weights,
+ warm_start,
+ verbose
)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR,
+ warm_start BOOLEAN
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_regression($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR,
+ weights VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_regression($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR,
+ activation VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_regression($1, $2, $3, $4, $5, $6, $7, NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[],
+ optimizer_params VARCHAR
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_regression($1, $2, $3, $4, $5, $6, NULL, NULL, NULL, FALSE);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression(
+ source_table VARCHAR,
+ output_table VARCHAR,
+ independent_varname VARCHAR,
+ dependent_varname VARCHAR,
+ hidden_layer_sizes INTEGER[]
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.mlp_regression($1, $2, $3, $4, $5, NULL, NULL, NULL, FALSE, FALSE);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
+
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_predict(
model_table VARCHAR,
data_table VARCHAR,
@@ -700,9 +955,11 @@ CREATE FUNCTION MADLIB_SCHEMA.internal_predict_mlp(
coeff DOUBLE PRECISION[],
independent_varname DOUBLE PRECISION[],
is_classification DOUBLE PRECISION,
- activation_function DOUBLE PRECISION,
+ activation DOUBLE PRECISION,
layer_sizes DOUBLE PRECISION[],
- is_response INTEGER
+ is_response INTEGER,
+ x_means DOUBLE PRECISION[],
+ x_stds DOUBLE PRECISION[]
)
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'