You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2019/04/05 19:02:14 UTC

[madlib] branch master updated: DL: Remove num_classes param from madlib_keras_fit()

This is an automated email from the ASF dual-hosted git repository.

njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new fbde1ff  DL: Remove num_classes param from madlib_keras_fit()
fbde1ff is described below

commit fbde1ffc802133187c824b4ce5671e4f52a34700
Author: Nandish Jayaram <nj...@apache.org>
AuthorDate: Wed Apr 3 11:10:17 2019 -0700

    DL: Remove num_classes param from madlib_keras_fit()
    
    The number of classes is embedded in the model architecture JSON, so it
    is not necessary to explicitly pass it again as a parameter to fit. This
    commit parses the model architecture JSON to automatically figure out
    the num_classes value to use.
    
    Closes #362
---
 .../modules/deep_learning/madlib_keras.py_in       |  4 +-
 .../modules/deep_learning/madlib_keras.sql_in      | 39 ++++++------
 .../modules/deep_learning/test/madlib_keras.sql_in |  3 +-
 .../modules/utilities/model_arch_info.py_in        | 72 ++++++++--------------
 4 files changed, 46 insertions(+), 72 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 1c5145c..364e9d6 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -38,11 +38,12 @@ from madlib_keras_helper import get_data_as_np_array
 from madlib_keras_wrapper import *
 
 from utilities.model_arch_info import get_input_shape
+from utilities.model_arch_info import get_num_classes
 from utilities.utilities import madlib_version
 
 def fit(schema_madlib, source_table, model, dependent_varname,
         independent_varname, model_arch_table, model_arch_id, compile_params,
-        fit_params, num_iterations, num_classes, use_gpu = True,
+        fit_params, num_iterations, use_gpu = True,
         validation_table=None, name="", description="", **kwargs):
     fit_validator = FitInputValidator(
         source_table, validation_table, model, model_arch_table,
@@ -66,6 +67,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
     query_result = query_result[0]
     model_arch = query_result['model_arch']
     input_shape = get_input_shape(model_arch)
+    num_classes = get_num_classes(model_arch)
     fit_validator.validate_input_shapes(source_table, input_shape)
     if validation_table:
         fit_validator.validate_input_shapes(validation_table, input_shape)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index aebe270..e4a8534 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -38,7 +38,6 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
     compile_params          VARCHAR,
     fit_params              VARCHAR,
     num_iterations          INTEGER,
-    num_classes             INTEGER,
     use_gpu                 BOOLEAN,
     validation_table        VARCHAR,
     name                    VARCHAR,
@@ -61,11 +60,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
     compile_params          VARCHAR,
     fit_params              VARCHAR,
     num_iterations          INTEGER,
-    num_classes             INTEGER,
     use_gpu                 BOOLEAN,
     validation_table        VARCHAR
 ) RETURNS VOID AS $$
-    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NULL, NULL);
+    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
 
@@ -79,10 +77,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
     compile_params          VARCHAR,
     fit_params              VARCHAR,
     num_iterations          INTEGER,
-    num_classes             INTEGER,
     use_gpu                 BOOLEAN
 ) RETURNS VOID AS $$
-    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL, NULL);
+    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NULL, NULL, NULL);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
 
@@ -95,10 +92,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
     model_arch_id           INTEGER,
     compile_params          VARCHAR,
     fit_params              VARCHAR,
-    num_iterations          INTEGER,
-    num_classes             INTEGER
+    num_iterations          INTEGER
 ) RETURNS VOID AS $$
-    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, TRUE, NULL, NULL, NULL);
+    SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, TRUE, NULL, NULL, NULL);
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
 
@@ -138,19 +134,20 @@ PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
 
-DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step(REAL[],
-                                                   SMALLINT[],
-                                                   INTEGER,
-                                                   INTEGER,
-                                                   INTEGER[],
-                                                   INTEGER[],
-                                                   TEXT,
-                                                   INTEGER[],
-                                                   TEXT,
-                                                   TEXT,
-                                                   TEXT,
-                                                   BOOLEAN,
-                                                   BYTEA);
+DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step(
+  REAL[],
+  SMALLINT[],
+  INTEGER,
+  INTEGER,
+  INTEGER[],
+  INTEGER[],
+  TEXT,
+  INTEGER[],
+  TEXT,
+  TEXT,
+  TEXT,
+  BOOLEAN,
+  BYTEA);
 CREATE AGGREGATE MADLIB_SCHEMA.fit_step(
     /* ind_var */                REAL[],
     /* dep_var */                SMALLINT[],
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 3e8a18c..923d0b6 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -72,9 +72,9 @@ SELECT madlib_keras_fit(
     $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
     $$ batch_size=2, epochs=1, verbose=0 $$::text,
     3,
-    2,
     FALSE,
     'cifar_10_sample_batched');
+
 SELECT assert(
         model_arch_table = 'model_arch' AND
         model_arch_id = 1 AND
@@ -124,7 +124,6 @@ SELECT madlib_keras_fit(
     $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
     $$ batch_size=2, epochs=1, verbose=0 $$::text,
     1,
-    2,
     FALSE,
     NULL,
     'model name', 'model desc');
diff --git a/src/ports/postgres/modules/utilities/model_arch_info.py_in b/src/ports/postgres/modules/utilities/model_arch_info.py_in
index 80f22cf..765aed7 100644
--- a/src/ports/postgres/modules/utilities/model_arch_info.py_in
+++ b/src/ports/postgres/modules/utilities/model_arch_info.py_in
@@ -21,69 +21,45 @@ m4_changequote(`<!', `!>')
 
 import sys
 import json
+import plpy
 
-def get_layers(arch):
-    d = json.loads(arch)
+def _get_layers(model_arch):
+    d = json.loads(model_arch)
     config = d['config']
     if type(config) == list:
-        return config  # In keras 1.x, all models are sequential
+        return config  # In keras 2.1.x, all models are sequential
     elif type(config) == dict and 'layers' in config:
         layers = config['layers']
         if type(layers) == list:
             return config['layers']  # In keras 2.x, only sequential models are supported
-    plpy.error('Unable to read input_shape from keras model arch.  Note: only sequential keras models are supported.')
-    return None
+    plpy.error("Unable to read model architecture JSON.")
 
-def get_input_shape(arch):
-    layers = get_layers(arch)
-    return layers[0]['config']['batch_input_shape'][1:]
+def get_input_shape(model_arch):
+    arch_layers = _get_layers(model_arch)
+    if 'batch_input_shape' in arch_layers[0]['config']:
+        return arch_layers[0]['config']['batch_input_shape'][1:]
+    plpy.error('Unable to get input shape from model architecture.')
 
-def print_model_arch_layers(arch):
-    layers = get_layers(arch)
+def get_num_classes(model_arch):
+    arch_layers = _get_layers(model_arch)
+    if 'units' in arch_layers[-1]['config']:
+        return arch_layers[-1]['config']['units']
+    plpy.error('Unable to get number of classes from model architecture.')
 
-    print("\nModel arch layers:")
+def get_model_arch_layers_str(model_arch):
+    arch_layers = _get_layers(model_arch)
+    layers = "Model arch layers:\n"
     first = True
-    for layer in layers:
+    for layer in arch_layers:
         if first:
             first = False
         else:
-            print("   |")
-            print("   V")
+            layers += "   |\n"
+            layers += "   V\n"
         class_name = layer['class_name']
         config = layer['config']
         if class_name == 'Dense':
-            print("{0}[{1}]".class_name)
+            layers += "{1}[{2}]\n".format(class_name, config['units'])
         else:
-            print(class_name)
-
-def print_input_shape(arch):
-    layers = get_layers(arch)
-    print("\nInput shape:")
-    print(layers[0]['config']['batch_input_shape'][1:])
-
-def print_required_imports(arch):
-    layers = get_layers(arch)
-    class_names = set(layer['class_name'] for layer in layers )
-    print("\nRequired imports:")
-    for module in class_names:
-        print("import {}".module)
-
-def main(argv):
-    if len(argv) >= 2:
-        file = open(argv[1],"r")
-    else:
-        file = sys.stdin
-
-    arch = file.readline().strip()
-    print_model_arch_layers(arch)
-    print_input_shape(arch)
-    print_required_imports(arch)
-
-def _error(msg):
-    raise Exception(msg)
-
-if __name__ == "__main__":
-    class plpy:
-        pass
-    plpy.error = _error
-    main(sys.argv)
+            layers += "{1}\n".format(class_name)
+    return layers