You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2019/04/05 19:02:14 UTC
[madlib] branch master updated: DL: Remove num_classes param from
madlib_keras_fit()
This is an automated email from the ASF dual-hosted git repository.
njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new fbde1ff DL: Remove num_classes param from madlib_keras_fit()
fbde1ff is described below
commit fbde1ffc802133187c824b4ce5671e4f52a34700
Author: Nandish Jayaram <nj...@apache.org>
AuthorDate: Wed Apr 3 11:10:17 2019 -0700
DL: Remove num_classes param from madlib_keras_fit()
The number of classes is embedded in the model architecture JSON, so it
is not necessary to explicitly pass it again as a parameter to fit. This
commit parses the model architecture JSON to automatically figure out
the num_classes value to use.
Closes #362
---
.../modules/deep_learning/madlib_keras.py_in | 4 +-
.../modules/deep_learning/madlib_keras.sql_in | 39 ++++++------
.../modules/deep_learning/test/madlib_keras.sql_in | 3 +-
.../modules/utilities/model_arch_info.py_in | 72 ++++++++--------------
4 files changed, 46 insertions(+), 72 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 1c5145c..364e9d6 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -38,11 +38,12 @@ from madlib_keras_helper import get_data_as_np_array
from madlib_keras_wrapper import *
from utilities.model_arch_info import get_input_shape
+from utilities.model_arch_info import get_num_classes
from utilities.utilities import madlib_version
def fit(schema_madlib, source_table, model, dependent_varname,
independent_varname, model_arch_table, model_arch_id, compile_params,
- fit_params, num_iterations, num_classes, use_gpu = True,
+ fit_params, num_iterations, use_gpu = True,
validation_table=None, name="", description="", **kwargs):
fit_validator = FitInputValidator(
source_table, validation_table, model, model_arch_table,
@@ -66,6 +67,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
query_result = query_result[0]
model_arch = query_result['model_arch']
input_shape = get_input_shape(model_arch)
+ num_classes = get_num_classes(model_arch)
fit_validator.validate_input_shapes(source_table, input_shape)
if validation_table:
fit_validator.validate_input_shapes(validation_table, input_shape)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index aebe270..e4a8534 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -38,7 +38,6 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
- num_classes INTEGER,
use_gpu BOOLEAN,
validation_table VARCHAR,
name VARCHAR,
@@ -61,11 +60,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
- num_classes INTEGER,
use_gpu BOOLEAN,
validation_table VARCHAR
) RETURNS VOID AS $$
- SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NULL, NULL);
+ SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
@@ -79,10 +77,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
compile_params VARCHAR,
fit_params VARCHAR,
num_iterations INTEGER,
- num_classes INTEGER,
use_gpu BOOLEAN
) RETURNS VOID AS $$
- SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL, NULL);
+ SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
@@ -95,10 +92,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_fit(
model_arch_id INTEGER,
compile_params VARCHAR,
fit_params VARCHAR,
- num_iterations INTEGER,
- num_classes INTEGER
+ num_iterations INTEGER
) RETURNS VOID AS $$
- SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, TRUE, NULL, NULL, NULL);
+ SELECT MADLIB_SCHEMA.madlib_keras_fit($1, $2, $3, $4, $5, $6, $7, $8, $9, TRUE, NULL, NULL, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
@@ -138,19 +134,20 @@ PythonFunctionBodyOnlyNoSchema(`deep_learning', `madlib_keras')
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step(REAL[],
- SMALLINT[],
- INTEGER,
- INTEGER,
- INTEGER[],
- INTEGER[],
- TEXT,
- INTEGER[],
- TEXT,
- TEXT,
- TEXT,
- BOOLEAN,
- BYTEA);
+DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.fit_step(
+ REAL[],
+ SMALLINT[],
+ INTEGER,
+ INTEGER,
+ INTEGER[],
+ INTEGER[],
+ TEXT,
+ INTEGER[],
+ TEXT,
+ TEXT,
+ TEXT,
+ BOOLEAN,
+ BYTEA);
CREATE AGGREGATE MADLIB_SCHEMA.fit_step(
/* ind_var */ REAL[],
/* dep_var */ SMALLINT[],
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 3e8a18c..923d0b6 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -72,9 +72,9 @@ SELECT madlib_keras_fit(
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
3,
- 2,
FALSE,
'cifar_10_sample_batched');
+
SELECT assert(
model_arch_table = 'model_arch' AND
model_arch_id = 1 AND
@@ -124,7 +124,6 @@ SELECT madlib_keras_fit(
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
1,
- 2,
FALSE,
NULL,
'model name', 'model desc');
diff --git a/src/ports/postgres/modules/utilities/model_arch_info.py_in b/src/ports/postgres/modules/utilities/model_arch_info.py_in
index 80f22cf..765aed7 100644
--- a/src/ports/postgres/modules/utilities/model_arch_info.py_in
+++ b/src/ports/postgres/modules/utilities/model_arch_info.py_in
@@ -21,69 +21,45 @@ m4_changequote(`<!', `!>')
import sys
import json
+import plpy
-def get_layers(arch):
- d = json.loads(arch)
+def _get_layers(model_arch):
+ d = json.loads(model_arch)
config = d['config']
if type(config) == list:
- return config # In keras 1.x, all models are sequential
+ return config # In keras 2.1.x, all models are sequential
elif type(config) == dict and 'layers' in config:
layers = config['layers']
if type(layers) == list:
return config['layers'] # In keras 2.x, only sequential models are supported
- plpy.error('Unable to read input_shape from keras model arch. Note: only sequential keras models are supported.')
- return None
+ plpy.error("Unable to read model architecture JSON.")
-def get_input_shape(arch):
- layers = get_layers(arch)
- return layers[0]['config']['batch_input_shape'][1:]
+def get_input_shape(model_arch):
+ arch_layers = _get_layers(model_arch)
+ if 'batch_input_shape' in arch_layers[0]['config']:
+ return arch_layers[0]['config']['batch_input_shape'][1:]
+ plpy.error('Unable to get input shape from model architecture.')
-def print_model_arch_layers(arch):
- layers = get_layers(arch)
+def get_num_classes(model_arch):
+ arch_layers = _get_layers(model_arch)
+ if 'units' in arch_layers[-1]['config']:
+ return arch_layers[-1]['config']['units']
+ plpy.error('Unable to get number of classes from model architecture.')
- print("\nModel arch layers:")
+def get_model_arch_layers_str(model_arch):
+ arch_layers = _get_layers(model_arch)
+ layers = "Model arch layers:\n"
first = True
- for layer in layers:
+ for layer in arch_layers:
if first:
first = False
else:
- print(" |")
- print(" V")
+ layers += " |\n"
+ layers += " V\n"
class_name = layer['class_name']
config = layer['config']
if class_name == 'Dense':
- print("{0}[{1}]".class_name)
+ layers += "{1}[{2}]\n".format(class_name, config['units'])
else:
- print(class_name)
-
-def print_input_shape(arch):
- layers = get_layers(arch)
- print("\nInput shape:")
- print(layers[0]['config']['batch_input_shape'][1:])
-
-def print_required_imports(arch):
- layers = get_layers(arch)
- class_names = set(layer['class_name'] for layer in layers )
- print("\nRequired imports:")
- for module in class_names:
- print("import {}".module)
-
-def main(argv):
- if len(argv) >= 2:
- file = open(argv[1],"r")
- else:
- file = sys.stdin
-
- arch = file.readline().strip()
- print_model_arch_layers(arch)
- print_input_shape(arch)
- print_required_imports(arch)
-
-def _error(msg):
- raise Exception(msg)
-
-if __name__ == "__main__":
- class plpy:
- pass
- plpy.error = _error
- main(sys.argv)
+ layers += "{1}\n".format(class_name)
+ return layers