You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2018/04/14 00:28:25 UTC

madlib git commit: MLP: Print loss of first n-1 iterations in verbose mode

Repository: madlib
Updated Branches:
  refs/heads/master 259e00416 -> ba137f35c


MLP: Print loss of first n-1 iterations in verbose mode

JIRA: MADLIB-1228

- When verbose mode was set in mlp classification and regression training,
  the loss was printed only for 2 -> n-1 iterations. This commit fixes it
  and now prints loss for 1 -> n-1 iterations (or whatever iteration
  convergence happens in). The loss for the n-th iteration can be found
  in the model table.
- This commit also fixes another bug that was discovered in
  __utils_normalize_data_grouping in utilis_regularization.py_in. The
  issue was that if grouping_col had more than one column specified in
  it, the query used for normalization used to error out due to
  ambiguity in column names referenced. This commit prefixes the table
  name while accessing the grouping column names now.

Closes #262


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/ba137f35
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/ba137f35
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/ba137f35

Branch: refs/heads/master
Commit: ba137f35caf3762e462925b72b3294255a0841ad
Parents: 259e004
Author: Nandish Jayaram <nj...@apache.org>
Authored: Wed Apr 11 17:22:18 2018 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Fri Apr 13 17:24:26 2018 -0700

----------------------------------------------------------------------
 src/ports/postgres/modules/convex/mlp_igd.py_in          |  5 ++---
 .../postgres/modules/convex/utils_regularization.py_in   | 11 +++++++++--
 src/ports/postgres/modules/utilities/utilities.py_in     |  5 +++--
 3 files changed, 14 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/ba137f35/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index c37cf96..5ec5e8d 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -326,12 +326,12 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                             _state_current[{state_size}]) < {tolerance}
                         """):
                     break
-                if verbose and 1 < it.iteration <= n_iterations:
+                if verbose and it.iteration < n_iterations:
                     # Get loss value from the state.
                     res = it.get_param_value_per_group(
                         "_state_current[array_upper(_state_current, 1)] AS loss")
                     # Create a list of grouping values if grouping_cols was
-                    # used, it will be an empty list if there was not grouping.
+                    # used, it will be an empty list if there was no grouping.
                     groups = [t[col_grp_key] for t in res if t[col_grp_key]]
                     losses = [t['loss'] for t in res]
                     loss = zip(groups, losses) if groups else losses
@@ -364,7 +364,6 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
     plpy.execute("DROP TABLE IF EXISTS {0}".format(temp_output_table))
     return None
 
-
 def normalize_data(args):
     """
         Create a new temp table (tbl_data_scaled) with the standardized version

http://git-wip-us.apache.org/repos/asf/madlib/blob/ba137f35/src/ports/postgres/modules/convex/utils_regularization.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/utils_regularization.py_in b/src/ports/postgres/modules/convex/utils_regularization.py_in
index 9fc3e8f..74ee3d4 100644
--- a/src/ports/postgres/modules/convex/utils_regularization.py_in
+++ b/src/ports/postgres/modules/convex/utils_regularization.py_in
@@ -7,6 +7,7 @@ from validation.cv_utils import __cv_split_data_using_id_tbl_compute
 from validation.cv_utils import __cv_generate_random_id
 from utilities.utilities import __mad_version
 from utilities.utilities import _check_groups
+from utilities.utilities import get_table_qualified_col_str
 from utilities.utilities import split_quoted_delimited_str
 
 version_wrapper = __mad_version()
@@ -217,6 +218,10 @@ def __utils_normalize_data_grouping(y_decenter=True, **kwargs):
     """
     group_col = kwargs.get('grouping_col')
     group_col_list = split_quoted_delimited_str(group_col)
+    # If more than one column was specified for grouping, prefix each column
+    # name with '{tbl_data}.' to avoid ambiguity.
+    select_grouping_cols = get_table_qualified_col_str(kwargs.get('tbl_data'),
+                                                       group_col_list)
     x_mean_join_clause = ''
     y_mean_join_clause = ''
     if kwargs.get('x_mean_table'):
@@ -238,13 +243,15 @@ def __utils_normalize_data_grouping(y_decenter=True, **kwargs):
                                             __x__.std::double precision[]))
                     AS {col_ind_var_norm_new},
                 ({col_dep_var} {ydecenter_str})  AS {col_dep_var_norm_new},
-                {tbl_data}.{group_col}
+                {select_grouping_cols}
             FROM {tbl_data}
             {x_mean_join_clause}
             {y_mean_join_clause}
         """.format(ydecenter_str=ydecenter_str, group_col=group_col,
                     x_mean_join_clause=x_mean_join_clause,
-                    y_mean_join_clause=y_mean_join_clause, **kwargs))
+                    y_mean_join_clause=y_mean_join_clause,
+                    select_grouping_cols=select_grouping_cols,
+                    **kwargs))
     return None
 # ========================================================================
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/ba137f35/src/ports/postgres/modules/utilities/utilities.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 4131a80..2ef40d6 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -803,8 +803,9 @@ def get_table_qualified_col_str(tbl_name, col_list):
 def get_grouping_col_str(schema_madlib, module_name, reserved_cols,
                          source_table, grouping_col):
     if grouping_col and grouping_col.lower() != 'null':
+        grouping_col_array = _string_to_array_with_quotes(grouping_col)
         cols_in_tbl_valid(source_table,
-                          _string_to_array_with_quotes(grouping_col),
+                          grouping_col_array,
                           module_name)
         intersect = frozenset(
             _string_to_array(grouping_col)).intersection(frozenset(reserved_cols))
@@ -816,7 +817,7 @@ def get_grouping_col_str(schema_madlib, module_name, reserved_cols,
         grouping_list = [i + "::text"
                          for i in explicit_bool_to_text(
                              source_table,
-                             _string_to_array_with_quotes(grouping_col),
+                             grouping_col_array,
                              schema_madlib)]
         grouping_str = ','.join(grouping_list)
     else: