You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2020/09/10 00:31:27 UTC
[madlib] branch master updated: DL: Check for nan in loss or metrics

This is an automated email from the ASF dual-hosted git repository.

domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new 67b8961  DL: Check for nan in loss or metrics
67b8961 is described below

commit 67b8961d1b02536e4a7e06b54111ea7f79e118e0
Author: Domino Valdno <dv...@vmware.com>
AuthorDate: Thu Aug 27 16:49:53 2020 -0400

    DL: Check for nan in loss or metrics
    
    JIRA: MADLIB-1443
    
    Handles NaN in madlib_keras_fit() or madlivb_keras_fit_multiple()
        by casting each float in an array to a derived class sqlfloat,
        which overrides the default representation of itself to show
        up as the corresponding SQL string.
    
      - Keeps metric history for each model up until the NaN failure
        instead of just discarding.
    
      - Translates all None's into NULL's and python nan's
        into SQL NaN's in any array of floats in one step,
        so that this can be used in all of the places we
        need it.
    
    pgsanity:
    
      - Some of the unit tests included make use of the
        python module pgsanity to verify the representation of
        an array of floats translates to a valid string that
        the SQL parser will understand as a numeric array.
        If it isn't installed, or isn't functioning properly, this
        part of the test will be skipped.
    
      - pgsanity calls ecpg under the hood, which is the same library
        used by postgres & greenplum to parse the SQL commands.  I
        hope we can use it for more unit tests in the future.
---
 .../modules/deep_learning/madlib_keras.py_in       | 21 +++-------
 .../madlib_keras_fit_multiple_model.py_in          |  9 ++---
 .../deep_learning/madlib_keras_helper.py_in        | 47 +++++++++++++++++++++-
 .../test/unit_tests/test_madlib_keras.py_in        | 41 +++++++++++++++++++
 4 files changed, 96 insertions(+), 22 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 8bb1531..e8eac71 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -251,24 +251,26 @@ def fit(schema_madlib, source_table, model, model_arch_table,
     dep_vartype = src_summary_dict['dep_vartype']
     dependent_varname = src_summary_dict['dependent_varname_in_source_table']
     independent_varname = src_summary_dict['independent_varname_in_source_table']
+
     # Define some constants to be inserted into the summary table.
     model_type = "madlib_keras"
     metrics_list = get_metrics_from_compile_param(compile_params)
     is_metrics_specified = True if metrics_list else False
     metrics_type = 'ARRAY{0}'.format(metrics_list) if is_metrics_specified else 'NULL'
     metrics_iters = metrics_iters if metrics_iters else 'NULL'
+
     # We always compute the training loss and metrics, at least once.
-    training_loss_final = training_loss[-1]
-    training_loss = 'ARRAY{0}'.format(training_loss) if training_loss else 'NULL'
     training_metrics_final, training_metrics = get_metrics_sql_string(
         training_metrics, is_metrics_specified)
+    training_loss_final, training_loss = get_metrics_sql_string(
+        training_loss, True)
+
     # Validation loss and metrics are computed only if validation_table
     # is provided.
     if validation_set_provided:
         validation_metrics_final, validation_metrics = get_metrics_sql_string(
             validation_metrics, is_metrics_specified)
-        validation_loss_final = validation_loss[-1]
-        validation_loss = 'ARRAY{0}'.format(validation_loss)
+        validation_loss_final, validation_loss = get_metrics_sql_string(validation_loss)
         # Must quote the string before inserting to table. Explicitly
         # quoting it here since this can also take a NULL value, done
         # in the else part.
@@ -400,17 +402,6 @@ def get_source_summary_table_dict(fit_validator):
         NORMALIZING_CONST_COLNAME, fit_validator.source_summary_table)
     return source_summary
 
-def get_metrics_sql_string(metrics_list, is_metrics_specified):
-    """
-        Return the SQL string to use for creating metrics SQL values.
-    """
-    if is_metrics_specified:
-        metrics_final = metrics_list[-1]
-        metrics_all = 'ARRAY{0}'.format(metrics_list)
-    else:
-        metrics_final = metrics_all = 'NULL'
-    return metrics_final, metrics_all
-
 def compute_loss_and_metrics(schema_madlib, table, compile_params, model_arch,
                              serialized_weights, use_gpus, accessible_gpus_for_seg,
                              dist_key_mapping, images_per_seg_val, metrics_list, loss_list,
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 0a9b9ae..b847550 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -561,14 +561,11 @@ class FitMultipleModel():
             mst_loss = self.valid_mst_loss
 
         if mst_key in mst_metric:
-            metrics = mst_metric[mst_key]
-            metrics_final = metrics[-1]
+            metrics_final, metrics = get_metrics_sql_string(mst_metric[mst_key])
             metrics_elapsed_time = mst_metric_eval_time[mst_key]
-            metrics = "ARRAY{}".format(metrics)
             metrics_elapsed_time = "ARRAY{}".format(metrics_elapsed_time)
-        loss = mst_loss[mst_key]
-        loss_final = loss[-1]
-        loss = "ARRAY{}".format(loss)
+        loss_final, loss = get_metrics_sql_string(mst_loss[mst_key])
+
         if is_train:
             update_query = """
                            UPDATE {self.model_info_table} SET
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index 49e3a12..ca54a4d 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -25,7 +25,7 @@ from utilities.utilities import is_platform_pg
 from utilities.validate_args import table_exists
 from madlib_keras_gpu_info import GPUInfoFunctions
 import plpy
-
+from math import isnan
 
 ############### Constants used in other deep learning files #########
 # Name of columns in model summary table.
@@ -308,3 +308,48 @@ def get_accessible_gpus_for_seg(schema_madlib, segments_per_host, module_name):
                     'recommended configuration is to have 1 GPU available per segment.')
                 warning_flag = False
         return accessible_gpus_for_seg
+
+class sqlnull:
+    def __repr__(self):
+        return 'NULL'
+
+class sqlfloat(float):
+    """
+        Same as a python float, but with a SQL-friendly
+        string representation for printing or formatting
+    """
+    def __repr__(self):
+        if isnan(self):
+            return "'NaN'::DOUBLE PRECISION"
+        else:
+            return float.__repr__(self)
+    def __str__(self):
+        return self.__repr__()
+
+def py_to_sql(x):
+    """
+        Converts a float, list of floats, or multi-dimensional
+        nested list of floats into corresponding lists of sqlfloat's
+    """
+
+    if type(x) == float:
+        return sqlfloat(x)
+    elif type(x) == list:
+        return map(py_to_sql, x)
+    elif x is None:
+        return sqlnull()
+    else:
+        return x
+
+def get_metrics_sql_string(metrics_list, is_metrics_specified=True):
+    """
+        Return the SQL string to use for creating metrics SQL values.
+    """
+    if is_metrics_specified:
+        metrics_list = py_to_sql(metrics_list)
+        metrics_final = '({0})'.format(metrics_list[-1])
+        metrics_all = '(ARRAY{0})'.format(metrics_list)
+    else:
+        metrics_final = metrics_all = 'NULL'
+    return metrics_final, metrics_all
+
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 774c943..6dacdcd 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -22,6 +22,7 @@ m4_changequote(`<!', `!>')
 import sys
 import numpy as np
 import os
+import re
 from os import path
 # Add convex module to the pythonpath.
 sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
@@ -33,6 +34,14 @@ from keras.layers import *
 import unittest
 from mock import *
 import plpy_mock as plpy
+try:
+    from pgsanity.pgsanity import check_string as pglint
+    pglint("SELECT 1;")
+except:
+    # Just skip this test if "pip install pgsanity" hasn't been run
+    #  or isn't functioning (eg. ecpg isn't in path)
+    def pglint(q, s=False):
+        return (True, '')
 
 # helper for multiplying array by int
 def mult(k,arr):
@@ -1337,6 +1346,38 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
             self.subject.get_accessible_gpus_for_seg('schema_madlib', 2, 'foo')
         self.assertIn('no gpus configured on hosts', str(error.exception).lower())
 
+    def test_get_metrics_sql_string(self):
+        NaN = float('nan')
+        test_metrics = [ 3.2, NaN, 0.0, None, 8.94, -6.8, 1.2, NaN, NaN ]
+
+        py2sql = { x : str(x) for x in test_metrics if type(x) == float }
+        py2sql[NaN] = "'NaN'::DOUBLE PRECISION"
+        py2sql[None] = 'NULL'
+        correct = [ py2sql[x] for x in test_metrics ]
+
+        final, metrics = self.subject.get_metrics_sql_string(test_metrics)
+
+        m = re.match("\(ARRAY\[(.*)\]\)", metrics)
+        answers = m.group(1).split(', ')
+        self.assertListEqual(answers, correct)
+        self.assertEqual(final, '(' + correct[-1] + ')')
+
+        # Check that postgresql parser sees this as a valid sql data type
+        res = pglint('SELECT ' + final, True)
+        self.assertEqual(
+            res,
+            (True, ''),
+            "This is not valid PostgresSQL: {}".format('SELECT' + final)
+        )
+
+        # Check that postgresql parser sees this as a valid array
+        res = pglint('SELECT {}[1]'.format(metrics), True)
+        self.assertEqual(
+            res,
+            (True, ''),
+            "This is not valid PostgresSQL: SELECT {}[1]".format(metrics)
+        )
+
 class MadlibKerasEvaluationTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')