You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/04/01 17:36:52 UTC
[madlib] branch master updated: DL: Remove eval statement
This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new 137ba49 DL: Remove eval statement
137ba49 is described below
commit 137ba49faf62db1ee83edbe6122e2ce3428b78eb
Author: Orhan Kislal <ok...@pivotal.io>
AuthorDate: Mon Apr 1 10:36:08 2019 -0700
DL: Remove eval statement
JIRA: MADLIB-1309
The eval statement used for parsing compile_params creates a security
risk. This commit replaces it by parsing the optimizer name and its
parameters.
Closes #359
Co-authored-by: Domino Valdano <dv...@pivotal.io>
---
.../modules/deep_learning/madlib_keras.py_in | 25 ++++--
.../deep_learning/madlib_keras_wrapper.py_in | 74 ++++++++++++++++--
.../modules/deep_learning/test/madlib_keras.sql_in | 91 +++++++++++-----------
.../test/unit_tests/test_madlib_keras.py_in | 4 +-
4 files changed, 135 insertions(+), 59 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 437211d..32cd921 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -43,7 +43,6 @@ from utilities.utilities import add_postfix
from utilities.utilities import is_var_valid
from utilities.utilities import madlib_version
-
def _validate_input_table(source_table, independent_varname,
dependent_varname):
_assert(is_var_valid(source_table, independent_varname),
@@ -177,6 +176,8 @@ def fit(schema_madlib, source_table, model, dependent_varname,
validation_table, dependent_varname, independent_varname,
input_shape, num_classes)
+ optimizers = get_optimizers()
+
# Compute total buffers on each segment
total_buffers_per_seg = plpy.execute(
""" SELECT gp_segment_id, count(*) AS total_buffers_per_seg
@@ -241,8 +242,10 @@ def fit(schema_madlib, source_table, model, dependent_varname,
if validation_set_provided:
_, _, _, updated_weights = KerasWeightsSerializer.deserialize_weights(model_state, model_shapes)
master_model.set_weights(updated_weights)
- compile_params_args = convert_string_of_args_to_dict(compile_params)
- master_model.compile(**compile_params_args)
+ (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
+ master_model.compile(optimizer=optimizers[opt_name](**final_args),
+ loss=compile_dict['loss'],
+ metrics=compile_dict['metrics'])
evaluate_result = master_model.evaluate(x_validation, y_validation)
if len(evaluate_result) < 2:
plpy.error('Calling evaluate on validation data returned < 2 '
@@ -396,7 +399,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
# Fit segment model on data
start_fit = time.time()
with K.tf.device(device_name):
- fit_params = convert_string_of_args_to_dict(fit_params)
+ fit_params = parse_fit_params(fit_params)
history = segment_model.fit(x_train, y_train, **fit_params)
loss = history.history['loss'][0]
accuracy = history.history['acc'][0]
@@ -502,9 +505,12 @@ def evaluate(schema_madlib, model_table, source_table, id_col,
_, updated_weights = KerasWeightsSerializer.deserialize_weights(
model_data, model_shapes)
model.set_weights(updated_weights)
- compile_params_args = convert_string_of_args_to_dict(compile_params)
+ optimizers = get_optimizers()
+ (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
with K.tf.device(device_name):
- model.compile(**compile_params_args)
+ model.compile(optimizer=optimizers[opt_name](**final_args),
+ loss=compile_dict['loss'],
+ metrics=compile_dict['metrics'])
input_shape = map(int, input_shape)
x_validation, y_validation = get_data_as_np_array(source_table,
@@ -557,7 +563,6 @@ def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table,
def internal_keras_evaluate(x_test, y_test, model_arch, model_data, input_shape,
compile_params):
- compile_params = convert_string_of_args_to_dict(compile_params)
device_name = '/cpu:0'
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
@@ -570,8 +575,12 @@ def internal_keras_evaluate(x_test, y_test, model_arch, model_data, input_shape,
_, model_weights = KerasWeightsSerializer.deserialize_weights(
model_data, model_shapes)
model.set_weights(model_weights)
+ optimizers = get_optimizers()
+ (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
with K.tf.device(device_name):
- model.compile(**compile_params)
+ model.compile(optimizer=optimizers[opt_name](**final_args),
+ loss=compile_dict['loss'],
+ metrics=compile_dict['metrics'])
x_test = np.array(x_test).reshape(len(x_test), input_shape[0], input_shape[1],
input_shape[2])
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 63d7e86..6f4706b 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -21,11 +21,16 @@ import numpy as np
import os
import plpy
+import ast
+
from keras import backend as K
from keras import utils as keras_utils
from keras.optimizers import *
+import keras.optimizers as opt
+
from madlib_keras_helper import KerasWeightsSerializer
+from utilities.utilities import _assert
#######################################################################
########### Keras specific functions #####
@@ -56,16 +61,21 @@ def clear_keras_session():
def compile_and_set_weights(segment_model, compile_params, device_name,
previous_state, model_shapes):
with K.tf.device(device_name):
- compile_params = convert_string_of_args_to_dict(compile_params)
- segment_model.compile(**compile_params)
+
+ optimizers = get_optimizers()
+ (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
+
+ segment_model.compile(optimizer=optimizers[opt_name](**final_args),
+ loss=compile_dict['loss'],
+ metrics=compile_dict['metrics'])
_, _, _, model_weights = KerasWeightsSerializer.deserialize_weights(
previous_state, model_shapes)
segment_model.set_weights(model_weights)
-
"""
Used to convert compile_params and fit_params to actual argument dictionaries
"""
+
def convert_string_of_args_to_dict(str_of_args):
"""Uses parenthases matching algorithm to intelligently convert
a string with valid python code into an argument dictionary"""
@@ -76,6 +86,9 @@ def convert_string_of_args_to_dict(str_of_args):
'{' : '}',
}
result_str = ""
+ key_str = ""
+ value_str = ""
+ compile_dict = {}
for char in str_of_args:
if char in dual.keys():
stack.append(char)
@@ -85,7 +98,58 @@ def convert_string_of_args_to_dict(str_of_args):
stack.pop(-1)
result_str += char
elif not stack and char == "=":
- result_str += ":"
+ key_str = result_str
+ result_str = ""
+ elif not stack and char == ",":
+ value_str = result_str
+ result_str = ""
+ compile_dict[key_str.strip()]=value_str.strip('\'')
else:
result_str += char
- return eval('{' + result_str + '}')
+ value_str = result_str
+ result_str = ""
+ compile_dict[key_str.strip()]=value_str.strip('\'')
+ return compile_dict
+
+# Parse the compile parameters and the optimizer.
+# Optimizer name and its arguments are returned in addition to the rest of the
+# compile parameters.
+def parse_compile_params(str_of_args):
+
+ compile_dict = convert_string_of_args_to_dict(str_of_args)
+ compile_dict['metrics'] = ast.literal_eval(compile_dict['metrics']) if 'metrics' in compile_dict.keys() else None
+ compile_dict['loss_weights'] = ast.literal_eval(compile_dict['loss_weights']) if 'loss_weights' in compile_dict.keys() else None
+
+ opt_name = compile_dict['optimizer'].split('(')[0]
+ optimizers = get_optimizers()
+ _assert(opt_name in optimizers,
+ "model_keras error: invalid optimizer name: {0}".format(opt_name))
+ opt_params = compile_dict['optimizer'].split('(')[1][:-1]
+ opt_params_array = opt_params.split(',')
+ opt_params_clean = map(split_and_strip, opt_params_array)
+ key_value_params = { x[0] : x[1] for x in opt_params_clean}
+ final_args = { key: bool(value) if value == 'True' or value == 'False' else float(value) for key,value in key_value_params.iteritems() }
+
+ return (opt_name,final_args,compile_dict)
+
+# Parse the fit parameters into a dictionary.
+def parse_fit_params(str_of_args):
+ compile_dict = convert_string_of_args_to_dict(str_of_args)
+ for key in compile_dict.keys():
+ compile_dict[key] = ast.literal_eval(compile_dict[key])
+ return compile_dict
+
+# Split and strip the whispace of key=value formatted strings
+def split_and_strip(x):
+ y = x.split('=')
+ return (y[0].strip(),y[1].strip())
+
+# Return the list of keras optimizers
+def get_optimizers():
+ optimizers = dict()
+ names = dir(opt)
+ for n in names:
+ optimizer = eval('opt.' + n)
+ if optimizer.__class__ == type and optimizer.__base__ == opt.Optimizer:
+ optimizers[n] = optimizer
+ return optimizers
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index f69bca5..fbf6a81 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -58,6 +58,9 @@ SELECT load_keras_model('model_arch',
}], "backend": "tensorflow"}$$);
ALTER TABLE model_arch RENAME model_id TO id;
+-- Please do not break up the compile_params string
+-- It might break the assertion
+
DROP TABLE IF EXISTS keras_out, keras_out_summary;
SELECT madlib_keras_fit('cifar_10_sample_batched',
'keras_out',
@@ -65,8 +68,8 @@ SELECT madlib_keras_fit('cifar_10_sample_batched',
'independent_var',
'model_arch',
1,
- '''optimizer''=SGD(lr=0.01, decay=1e-6, nesterov=True), ''loss''=''categorical_crossentropy'', ''metrics''=[''accuracy'']'::text,
- '''batch_size''=2, ''epochs''=1, ''verbose''=0'::text,
+ $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
+ $$ batch_size=2, epochs=1, verbose=0 $$::text,
3,
10,
FALSE,
@@ -86,8 +89,8 @@ SELECT assert(
description is NULL AND
model_size > 0 AND
madlib_version is NOT NULL AND
- compile_params = '''optimizer''=SGD(lr=0.01, decay=1e-6, nesterov=True), ''loss''=''categorical_crossentropy'', ''metrics''=[''accuracy'']' AND
- fit_params = '''batch_size''=2, ''epochs''=1, ''verbose''=0' AND
+ compile_params = $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text AND
+ fit_params = $$ batch_size=2, epochs=1, verbose=0 $$::text AND
num_iterations = 3 AND
num_classes = 10 AND
accuracy is not NULL AND
@@ -110,47 +113,47 @@ SELECT assert(model_data is not NULL , 'Keras model output validation failed') f
-- Null validation table
DROP TABLE IF EXISTS keras_out, keras_out_summary;
SELECT madlib_keras_fit('cifar_10_sample_batched',
- 'keras_out',
- 'dependent_var',
- 'independent_var',
- 'model_arch',
- 1,
- '''optimizer''=SGD(lr=0.01, decay=1e-6, nesterov=True), ''loss''=''categorical_crossentropy'', ''metrics''=[''accuracy'']'::text,
- '''batch_size''=2, ''epochs''=1, ''verbose''=0'::text,
- 1,
- 10,
- FALSE,
- NULL,
- 'model name', 'model desc');
+ 'keras_out',
+ 'dependent_var',
+ 'independent_var',
+ 'model_arch',
+ 1,
+ $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
+ $$ batch_size=2, epochs=1, verbose=0 $$::text,
+ 1,
+ 10,
+ FALSE,
+ NULL,
+ 'model name', 'model desc');
SELECT assert(
- model_arch_table = 'model_arch' AND
- model_arch_id = 1 AND
- model_type = 'madlib_keras' AND
- start_training_time < now() AND
- end_training_time > start_training_time AND
- source_table = 'cifar_10_sample_batched' AND
- validation_table = 'cifar_10_sample_batched' AND
- model = 'keras_out' AND
- dependent_varname = 'dependent_var' AND
- independent_varname = 'independent_var' AND
- name = 'model name' AND
- description = 'model desc' AND
- model_size > 0 AND
- madlib_version is NOT NULL AND
- compile_params = '''optimizer''=SGD(lr=0.01, decay=1e-6, nesterov=True), ''loss''=''categorical_crossentropy'', ''metrics''=[''accuracy'']' AND
- fit_params = '''batch_size''=2, ''epochs''=1, ''verbose''=0' AND
- num_iterations = 1 AND
- num_classes = 10 AND
- accuracy is not NULL AND
- loss is not NULL AND
- array_upper(accuracy_iter, 1) = 1 AND
- array_upper(loss_iter, 1) = 1 AND
- array_upper(time_iter, 1) = 1 AND
- accuracy_validation is NULL AND
- loss_validation is NULL AND
- array_upper(accuracy_iter_validation,1) = 0 AND
- array_upper(loss_iter_validation,1) = 0 ,
- 'Keras model output Summary Validation failed. Actual:' || __to_char(summary))
+ model_arch_table = 'model_arch' AND
+ model_arch_id = 1 AND
+ model_type = 'madlib_keras' AND
+ start_training_time < now() AND
+ end_training_time > start_training_time AND
+ source_table = 'cifar_10_sample_batched' AND
+ validation_table = 'cifar_10_sample_batched' AND
+ model = 'keras_out' AND
+ dependent_varname = 'dependent_var' AND
+ independent_varname = 'independent_var' AND
+ name = 'model name' AND
+ description = 'model desc' AND
+ model_size > 0 AND
+ madlib_version is NOT NULL AND
+ compile_params = $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text AND
+ fit_params = $$ batch_size=2, epochs=1, verbose=0 $$::text AND
+ num_iterations = 1 AND
+ num_classes = 10 AND
+ accuracy is not NULL AND
+ loss is not NULL AND
+ array_upper(accuracy_iter, 1) = 1 AND
+ array_upper(loss_iter, 1) = 1 AND
+ array_upper(time_iter, 1) = 1 AND
+ accuracy_validation is NULL AND
+ loss_validation is NULL AND
+ array_upper(accuracy_iter_validation,1) = 0 AND
+ array_upper(loss_iter_validation,1) = 0 ,
+ 'Keras model output Summary Validation failed. Actual:' || __to_char(summary))
from (select * from keras_out_summary) summary;
SELECT assert(model_data is not NULL , 'Keras model output validation failed') from (select * from keras_out) k;
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index fe6a1b8..d9613ca 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -55,8 +55,8 @@ class MadlibKerasFitTestCase(unittest.TestCase):
input_shape=(1,1,1,), padding='same'))
self.model.add(Flatten())
- self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
- self.fit_params = "'batch_size'=1, 'epochs'=1"
+ self.compile_params = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']"
+ self.fit_params = "batch_size=1, epochs=1"
self.model_weights = [3,4,5,6]
self.model_shapes = []
for a in self.model.get_weights():