You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2018/04/10 20:45:03 UTC
[3/3] madlib git commit: MLP: Fix bug in array dep var for regression
MLP: Fix bug in array dep var for regression
MLP training for regression fails if the dependent var type is an array.
This is because the variable dependent_varname was not updated to
reflect the new column created in the standardized table. This commit
fixes that issue.
Co-authored-by: Nikhil Kak <nk...@pivotal.io>
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/47eefc1c
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/47eefc1c
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/47eefc1c
Branch: refs/heads/master
Commit: 47eefc1c91a33db0a788c2c145b8016b892de5ad
Parents: 5a71ff6
Author: Nandish Jayaram <nj...@apache.org>
Authored: Wed Apr 4 14:42:38 2018 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Tue Apr 10 11:14:21 2018 -0700
----------------------------------------------------------------------
src/ports/postgres/modules/convex/mlp_igd.py_in | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/47eefc1c/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 8010579..2799355 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -155,21 +155,21 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
normalize_data(locals())
dependent_vartype = get_expr_type(dependent_varname, source_table)
+ # We are now using tbl_data_scaled, so change the dependent
+ # varname accordingly.
+ dependent_varname = col_dep_var_norm_new
if is_classification:
# If dependent variable is an array during classification, assume
# that it is already one-hot-encoded.
if "[]" in dependent_vartype:
- # We are now using tbl_data_scaled, so change the dependent
- # varname accordingly.
- dependent_varname = col_dep_var_norm_new
num_output_nodes = get_col_dimension(tbl_data_scaled,
dependent_varname)
else:
labels = plpy.execute("SELECT DISTINCT {0} FROM {1}".
- format(dependent_varname, source_table))
+ format(dependent_varname_backup, source_table))
num_output_nodes = len(labels)
for label_obj in labels:
- label = _format_label(label_obj[dependent_varname])
+ label = _format_label(label_obj[dependent_varname_backup])
classes.append(label)
classes.sort()
level_vals_str = ','.join(["{0}={1}".format(