You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nk...@apache.org on 2019/04/15 22:17:08 UTC
[madlib] branch master updated (50d8b59 -> c4f8349)
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git.
from 50d8b59 Madpack: Fix IC/DC check to include wcc
new 241074a DL: Remove reshaping from training and validation data
new 8570aa0 DL: Use FORMAT class for getting model_arch col names
new ebec582 DL: Rename get_device_name function
new 63847d7 DL: Remove evaluate function
new c4f8349 DL: Rename helper to validator and move all unit tests to madlib_keras.py_in
The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../deep_learning/keras_model_arch_table.py_in | 19 +-
.../modules/deep_learning/madlib_keras.py_in | 223 +++++++--------
.../modules/deep_learning/madlib_keras.sql_in | 40 +--
.../deep_learning/madlib_keras_helper.py_in | 304 ---------------------
.../deep_learning/madlib_keras_predict.py_in | 24 +-
.../deep_learning/madlib_keras_serializer.py_in | 153 +++++++++++
.../deep_learning/madlib_keras_validator.py_in | 129 +++++++++
.../deep_learning/madlib_keras_wrapper.py_in | 19 +-
.../modules/deep_learning/test/madlib_keras.sql_in | 76 ++++--
.../test/unit_tests/test_madlib_keras.py_in | 187 ++++++++++++-
.../test/unit_tests/test_madlib_keras_helper.py_in | 217 ---------------
.../modules/utilities/minibatch_validation.py_in | 10 +-
12 files changed, 646 insertions(+), 755 deletions(-)
delete mode 100644 src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
create mode 100644 src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in
create mode 100644 src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
delete mode 100644 src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_helper.py_in
[madlib] 03/05: DL: Rename get_device_name function
Posted by nk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit ebec58271efc87d697c35639f9bbcb7aa47cd399
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Apr 10 16:20:49 2019 -0700
DL: Rename get_device_name function
JIRA: MADLIB-1304
Renamed it because it was also doing a set operation
Closes #367
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
src/ports/postgres/modules/deep_learning/madlib_keras.py_in | 5 ++---
src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in | 2 +-
.../modules/deep_learning/test/unit_tests/test_madlib_keras.py_in | 6 +++---
.../test/unit_tests/test_madlib_keras_serializer.py_in | 4 ++--
4 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index b883cac..83ca5a4 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -336,8 +336,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
SD = kwargs['SD']
# Configure GPUs/CPUs
- device_name = get_device_name_for_keras(
- use_gpu, current_seg_id)
+ device_name = get_device_name_and_set_cuda_env(use_gpu, current_seg_id)
# Set up system if this is the first buffer on segment'
@@ -525,7 +524,7 @@ def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table,
def internal_keras_evaluate(dependent_var, independent_var, model_architecture,
model_data, compile_params, use_gpu, seg, **kwargs):
- device_name = get_device_name_for_keras(use_gpu, seg)
+ device_name = get_device_name_and_set_cuda_env(use_gpu, seg)
model = model_from_json(model_architecture)
model_shapes = madlib_keras_serializer.get_model_shapes(model)
_, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index a9ebcef..6ebf96e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -35,7 +35,7 @@ from utilities.utilities import _assert
#######################################################################
########### Keras specific functions #####
#######################################################################
-def get_device_name_for_keras(use_gpu, seg):
+def get_device_name_and_set_cuda_env(use_gpu, seg):
gpus_per_host = 4
if use_gpu:
device_name = '/gpu:0'
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 059bd11..8ca4958 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -151,12 +151,12 @@ class MadlibKerasFitTestCase(unittest.TestCase):
self.assertEqual(0, self.subject.clear_keras_session.call_count)
self.assertEqual(2, k['SD']['buffer_count'])
- def test_get_device_name_for_keras(self):
+ def test_get_device_name_and_set_cuda_env(self):
import os
- self.assertEqual('/gpu:0', self.subject.get_device_name_for_keras(
+ self.assertEqual('/gpu:0', self.subject.get_device_name_and_set_cuda_env(
True, 1))
self.assertEqual('1', os.environ["CUDA_VISIBLE_DEVICES"])
- self.assertEqual('/cpu:0', self.subject.get_device_name_for_keras(
+ self.assertEqual('/cpu:0', self.subject.get_device_name_and_set_cuda_env(
False, 1))
self.assertEqual('-1', os.environ["CUDA_VISIBLE_DEVICES"])
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
index 6844327..8264800 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
@@ -33,7 +33,7 @@ import plpy_mock as plpy
m4_changequote(`<!', `!>')
-class MadlibKerasHelperTestCase(unittest.TestCase):
+class MadlibSerializerTestCase(unittest.TestCase):
def setUp(self):
self.plpy_mock = Mock(spec='error')
patches = {
@@ -139,7 +139,7 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
res)
-class MadlibSerializerTestCase(unittest.TestCase):
+class MadlibKerasHelperTestCase(unittest.TestCase):
def setUp(self):
self.plpy_mock = Mock(spec='error')
patches = {
[madlib] 01/05: DL: Remove reshaping from training and validation
data
Posted by nk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 241074ae68cb8e15437f98abf1c2e3c7bb3471ae
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Thu Mar 28 14:36:16 2019 -0700
DL: Remove reshaping from training and validation data
JIRA: MADLIB-1304
We now assume that the user has the correct input shape to match the
model architecture. We will not reshape either training or validation
data for preparing data for keras.fit/keras.eval
We now run keras.eval on segments instead of master. This is done to
make use of the MPP architecture. We now assume that the validation
data is not minibatched i.e. one image per row. We run keras.eval on
each row of the validation data and then aggregate the results using
madlib.array_avg.
Additionally
1. Add import keras although it's not directly used. For ex if the user
passes in the optimizer as keras.optimizers.SGD instead of just SGD,
then without this import our keras python files won't find the SGD
module.
2. Moved the hardcoded value of gpus_per_host to the
`get_device_name_for_keras` function. This will eventually be refactored
in a future PR.
3. Add numeric array check to dependent var for validation table.
Closes #367
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
Co-authored-by: Orhan Kislal <ok...@apache.org>
---
.../modules/deep_learning/madlib_keras.py_in | 154 ++++++++------
.../modules/deep_learning/madlib_keras.sql_in | 13 +-
.../deep_learning/madlib_keras_helper.py_in | 235 +++------------------
.../deep_learning/madlib_keras_predict.py_in | 12 +-
.../deep_learning/madlib_keras_serializer.py_in | 153 ++++++++++++++
.../deep_learning/madlib_keras_wrapper.py_in | 19 +-
.../modules/deep_learning/test/madlib_keras.sql_in | 75 ++++---
.../test/unit_tests/test_madlib_keras.py_in | 12 +-
...er.py_in => test_madlib_keras_serializer.py_in} | 142 ++++++-------
.../modules/utilities/minibatch_validation.py_in | 10 +-
10 files changed, 415 insertions(+), 410 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 55892d2..c3f36ab 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -24,18 +24,22 @@ import plpy
import sys
import time
+# Do not remove `import keras` although it's not directly used in this file.
+# For ex if the user passes in the optimizer as keras.optimizers.SGD instead of just
+# SGD, then without this import this python file won't find the SGD module
+import keras
+
from keras import backend as K
from keras import utils as keras_utils
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.regularizers import *
-
+import madlib_keras_serializer
from madlib_keras_helper import CLASS_VALUES_COLNAME
from madlib_keras_helper import DEPENDENT_VARTYPE
from madlib_keras_helper import NORMALIZING_CONST_COLNAME
from madlib_keras_helper import FitInputValidator
-from madlib_keras_helper import get_data_as_np_array
from madlib_keras_wrapper import *
from utilities.model_arch_info import get_input_shape
@@ -59,6 +63,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
use_gpu = bool(use_gpu)
# Get the serialized master model
+ #TODO fix hardcoding of col names
start_deserialization = time.time()
model_arch_query = "SELECT model_arch, model_weights FROM {0} WHERE "\
"id = {1}".format(model_arch_table, model_arch_id)
@@ -70,9 +75,9 @@ def fit(schema_madlib, source_table, model, dependent_varname,
model_arch = query_result['model_arch']
input_shape = get_input_shape(model_arch)
num_classes = get_num_classes(model_arch)
- fit_validator.validate_input_shapes(source_table, input_shape)
+ fit_validator.validate_input_shapes(source_table, input_shape, 2)
if validation_table:
- fit_validator.validate_input_shapes(validation_table, input_shape)
+ fit_validator.validate_input_shapes(validation_table, input_shape, 1)
model_weights_serialized = query_result['model_weights']
# Convert model from json and initialize weights
@@ -86,18 +91,13 @@ def fit(schema_madlib, source_table, model, dependent_varname,
if model_weights_serialized:
# If warm start from previously trained model, set weights
- model_weights = KerasWeightsSerializer.deserialize_weights_orig(
+ model_weights = madlib_keras_serializer.deserialize_weights_orig(
model_weights_serialized, model_shapes)
master_model.set_weights(model_weights)
# Construct validation dataset if provided
validation_set_provided = bool(validation_table)
validation_aggregate_accuracy = []; validation_aggregate_loss = []
- x_validation = None; y_validation = None
- if validation_set_provided:
- x_validation, y_validation = get_data_as_np_array(
- validation_table, dependent_varname, independent_varname,
- input_shape, num_classes)
optimizers = get_optimizers()
@@ -136,7 +136,7 @@ def fit(schema_madlib, source_table, model, dependent_varname,
use_gpu, source_table), ["bytea"])
# Define the state for the model and loss/accuracy storage lists
- model_state = KerasWeightsSerializer.serialize_weights(
+ model_state = madlib_keras_serializer.serialize_weights(
0, 0, 0, model_weights)
aggregate_loss, aggregate_accuracy, aggregate_runtime = [], [], []
@@ -157,21 +157,22 @@ def fit(schema_madlib, source_table, model, dependent_varname,
plpy.info("Time for iteration {0}: {1} sec".
format(i + 1, end_iteration - start_iteration))
aggregate_runtime.append(datetime.datetime.now())
- avg_loss, avg_accuracy, model_state = \
- KerasWeightsSerializer.deserialize_iteration_state(iteration_result)
+ avg_loss, avg_accuracy, model_state = madlib_keras_serializer.deserialize_iteration_state(iteration_result)
plpy.info("Average loss after training iteration {0}: {1}".format(
i + 1, avg_loss))
plpy.info("Average accuracy after training iteration {0}: {1}".format(
i + 1, avg_accuracy))
if validation_set_provided:
- _, _, _, updated_weights = \
- KerasWeightsSerializer.deserialize_weights(model_state, model_shapes)
+ _, _, _, updated_weights = madlib_keras_serializer.deserialize_weights(
+ model_state, model_shapes)
master_model.set_weights(updated_weights)
- (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
- master_model.compile(optimizer=optimizers[opt_name](**final_args),
- loss=compile_dict['loss'],
- metrics=compile_dict['metrics'])
- evaluate_result = master_model.evaluate(x_validation, y_validation)
+ evaluate_result = get_loss_acc_from_keras_eval(schema_madlib,
+ validation_table,
+ dependent_varname,
+ independent_varname,
+ compile_params_to_pass,
+ model_arch, model_state,
+ use_gpu)
if len(evaluate_result) < 2:
plpy.error('Calling evaluate on validation data returned < 2 '
'metrics. Expected metrics are loss and accuracy')
@@ -184,7 +185,6 @@ def fit(schema_madlib, source_table, model, dependent_varname,
aggregate_loss.append(avg_loss)
aggregate_accuracy.append(avg_accuracy)
-
end_training_time = datetime.datetime.now()
final_validation_acc = None
@@ -276,6 +276,35 @@ def fit(schema_madlib, source_table, model, dependent_varname,
plpy.execute(create_output_table, [model_state])
+def get_loss_acc_from_keras_eval(schema_madlib, table, dependent_varname,
+ independent_varname, compile_params, model_arch,
+ model_data, use_gpu):
+ """
+ This function will call the internal keras evaluate function to get the loss
+ and accuracy of each tuple which then gets averaged to get the final result.
+ :param schema_madlib:
+ :param table:
+ :param dependent_varname:
+ :param independent_varname:
+ :param compile_params:
+ :param model_arch:
+ :param model_data:
+ :return:
+ """
+ evaluate_query = plpy.prepare("""
+ select {schema_madlib}.array_avg(loss_acc, True) as final_loss_acc from
+ (
+ select ({schema_madlib}.internal_keras_evaluate({dependent_varname},
+ {independent_varname},
+ $MAD${model_arch}$MAD$,
+ $1, {compile_params},
+ {use_gpu}, gp_segment_id)) as loss_acc
+ from {table}
+ ) q""".format(**locals()), ["bytea"])
+ res = plpy.execute(evaluate_query, [model_data])
+ loss_acc = res[0]['final_loss_acc']
+ return loss_acc
+
def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
all_seg_ids, total_buffers_per_seg, architecture,
compile_params, fit_params, use_gpu, previous_state,
@@ -304,17 +333,16 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
start_transition = time.time()
SD = kwargs['SD']
- gpus_per_host = 4
# Configure GPUs/CPUs
device_name = get_device_name_for_keras(
- use_gpu, current_seg_id, gpus_per_host)
+ use_gpu, current_seg_id)
# Set up system if this is the first buffer on segment'
if not state:
set_keras_session(use_gpu)
segment_model = model_from_json(architecture)
- SD['model_shapes'] = KerasWeightsSerializer.get_model_shapes(segment_model)
+ SD['model_shapes'] = madlib_keras_serializer.get_model_shapes(segment_model)
compile_and_set_weights(segment_model, compile_params, device_name,
previous_state, SD['model_shapes'])
SD['segment_model'] = segment_model
@@ -323,21 +351,18 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
agg_accuracy = 0
else:
segment_model = SD['segment_model']
- # Since we deserialize everytime, the transition function might be slightly
- # slower
- agg_loss, agg_accuracy, _, _ = KerasWeightsSerializer.deserialize_weights(
+ #TODO we don't need to deserialize the weights here.
+ agg_loss, agg_accuracy, _, _ = madlib_keras_serializer.deserialize_weights(
state, SD['model_shapes'])
- input_shape = get_input_shape(architecture)
-
# Prepare the data
- x_train = np.array(ind_var, dtype='float64').reshape(
- len(ind_var), *input_shape)
+ x_train = np.array(ind_var, dtype='float64')
y_train = np.array(dep_var)
# Fit segment model on data
start_fit = time.time()
with K.tf.device(device_name):
+ #TODO consider not doing this every time
fit_params = parse_fit_params(fit_params)
history = segment_model.fit(x_train, y_train, **fit_params)
loss = history.history['loss'][0]
@@ -362,7 +387,7 @@ def fit_transition(state, ind_var, dep_var, current_seg_id, num_classes,
agg_accuracy /= total_buffers
clear_keras_session()
- new_model_state = KerasWeightsSerializer.serialize_weights(
+ new_model_state = madlib_keras_serializer.serialize_weights(
agg_loss, agg_accuracy, SD['buffer_count'], updated_weights)
del x_train
@@ -380,8 +405,8 @@ def fit_merge(state1, state2, **kwargs):
return state1 or state2
# Deserialize states
- loss1, accuracy1, buffer_count1, weights1 = KerasWeightsSerializer.deserialize_weights_merge(state1)
- loss2, accuracy2, buffer_count2, weights2 = KerasWeightsSerializer.deserialize_weights_merge(state2)
+ loss1, accuracy1, buffer_count1, weights1 = madlib_keras_serializer.deserialize_weights_merge(state1)
+ loss2, accuracy2, buffer_count2, weights2 = madlib_keras_serializer.deserialize_weights_merge(state2)
# plpy.info('merge buffer loss1 {}, accuracy1 {}, buffer count1 {}'.format(loss1, accuracy1, buffer_count1))
# plpy.info('merge buffer loss2 {}, accuracy2 {}, buffer count2 {}'.format(loss2, accuracy2, buffer_count2))
@@ -407,7 +432,7 @@ def fit_merge(state1, state2, **kwargs):
# avg_weights = [(merge_weight1 * e1) + (merge_weight2 * e2) for e1, e2 in zip(weights1, weights2)]
# Return the merged state
- return KerasWeightsSerializer.serialize_weights_merge(
+ return madlib_keras_serializer.serialize_weights_merge(
avg_loss, avg_accuracy, total_buffers, avg_weights)
def fit_final(state, **kwargs):
@@ -441,7 +466,7 @@ def evaluate(schema_madlib, model_table, source_table, id_col,
model_shapes = []
for weight_arr in model.get_weights():
model_shapes.append(weight_arr.shape)
- _, updated_weights = KerasWeightsSerializer.deserialize_weights(
+ _, updated_weights = madlib_keras_serializer.deserialize_weights(
model_data, model_shapes)
model.set_weights(updated_weights)
optimizers = get_optimizers()
@@ -454,9 +479,7 @@ def evaluate(schema_madlib, model_table, source_table, id_col,
input_shape = map(int, input_shape)
x_validation, y_validation = get_data_as_np_array(source_table,
dependent_varname,
- independent_varname,
- input_shape,
- num_classes)
+ independent_varname)
plpy.info('X shape : {0}'.format(x_validation.shape))
plpy.info('Y shape : {0}'.format(y_validation.shape))
@@ -488,30 +511,22 @@ def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table,
model_arch_table, model_arch_id))
query_result = query_result[0]
model_arch = query_result['model_arch']
- input_shape = get_input_shape(model_arch)
compile_params = "$madlib$" + compile_params + "$madlib$"
- # evaluate_query = plpy.prepare("""create table {output_table} as
- evaluate_query = plpy.prepare("""
- select {id_col}, (madlib.internal_keras_evaluate({independent_varname},
- {dependent_varname},
- $MAD${model_arch}$MAD$,
- $1,ARRAY{input_shape},
- {compile_params}))
- from {test_table}""".format(**locals()), ["bytea"])
- plpy.execute(evaluate_query, [model_data])
-
-def internal_keras_evaluate(x_test, y_test, model_arch, model_data, input_shape,
- compile_params):
- device_name = '/cpu:0'
- os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
-
- model = model_from_json(model_arch)
- plpy.info('model in str is {}'.format(str(model)))
- model_shapes = []
- for weight_arr in model.get_weights():
- model_shapes.append(weight_arr.shape)
- _, model_weights = KerasWeightsSerializer.deserialize_weights(
+ loss_acc = get_loss_acc_from_keras_eval(schema_madlib, test_table, dependent_varname,
+ independent_varname, compile_params, model_arch,
+ model_data, False)
+ #TODO remove these infos after adding create table command
+ plpy.info('len of evaluate result is {}'.format(len(loss_acc)))
+ plpy.info('evaluate result loss is {}'.format(loss_acc[0]))
+ plpy.info('evaluate result acc is {}'.format(loss_acc[1]))
+
+def internal_keras_evaluate(dependent_var, independent_var, model_architecture,
+ model_data, compile_params, use_gpu, seg, **kwargs):
+ device_name = get_device_name_for_keras(use_gpu, seg)
+ model = model_from_json(model_architecture)
+ model_shapes = madlib_keras_serializer.get_model_shapes(model)
+ _, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
model_data, model_shapes)
model.set_weights(model_weights)
optimizers = get_optimizers()
@@ -521,11 +536,16 @@ def internal_keras_evaluate(x_test, y_test, model_arch, model_data, input_shape,
loss=compile_dict['loss'],
metrics=compile_dict['metrics'])
- x_test = np.array(x_test).reshape(len(x_test), input_shape[0], input_shape[1],
- input_shape[2])
- x_test = x_test.astype('float32')
- y_test = np.array(y_test)
+ # Since the training data is batched but the validation data isn't, we have
+ # to make sure that the validation data np array has the same no of dimensions
+ # as training data. So we prepend 1 to both x and y np arrays using expand_dims.
+ independent_var = np.array(independent_var)
+ independent_var = np.expand_dims(independent_var, axis=0)
+ independent_var = independent_var.astype('float32')
+
+ dependent_var = np.array(dependent_var)
+ dependent_var = np.expand_dims(dependent_var, axis=0)
+
with K.tf.device(device_name):
- res = model.evaluate(x_test, y_test)
- plpy.info('evaluate result from internal_keras_evaluate is {}'.format(res))
+ res = model.evaluate(independent_var, dependent_var)
return res
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index 9bc462f..9526c91 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -263,17 +263,12 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_evaluate(
independent_var double precision [],
model_architecture TEXT,
model_data bytea,
- input_shape integer[],
- compile_params TEXT
+ compile_params TEXT,
+ use_gpu BOOLEAN,
+ seg INTEGER
) RETURNS DOUBLE PRECISION[] AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
with AOControl(False):
- return madlib_keras.internal_keras_evaluate(
- dependent_var,
- independent_var,
- model_architecture,
- model_data,
- input_shape,
- compile_params)
+ return madlib_keras.internal_keras_evaluate(**globals())
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index 3313dd3..52e7d20 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -1,201 +1,14 @@
-# coding=utf-8
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import os
-import plpy
-
-# Import needed for get_data_as_np_array()
-from keras import utils as keras_utils
-
from utilities.minibatch_validation import validate_dependent_var_for_minibatch
from utilities.utilities import _assert
from utilities.utilities import add_postfix
from utilities.utilities import is_var_valid
+from utilities.utilities import is_valid_psql_type
+from utilities.utilities import NUMERIC
+from utilities.utilities import ONLY_ARRAY
from utilities.validate_args import get_expr_type
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import output_tbl_valid
-
-#######################################################################
-########### Helper functions to serialize and deserialize weights #####
-#######################################################################
-class KerasWeightsSerializer:
-
- @staticmethod
- def get_model_shapes(model):
- model_shapes = []
- for a in model.get_weights():
- model_shapes.append(a.shape)
- return model_shapes
-
- @staticmethod
- def deserialize_weights(model_state, model_shapes):
- """
- Parameters:
- model_state: a stringified (serialized) state containing loss,
- accuracy, buffer_count, and model_weights, passed from postgres
- model_shapes: a list of tuples containing the shapes of each element
- in keras.get_weights()
- Returns:
- buffer_count: the buffer count from state
- model_weights: a list of numpy arrays that can be inputted into keras.set_weights()
- """
- if not model_state or not model_shapes:
- return None
- state = np.fromstring(model_state, dtype=np.float32)
- model_weights_serialized = state[3:]
- i, j, model_weights = 0, 0, []
- while j < len(model_shapes):
- next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
- weight_arr_portion = model_weights_serialized[i:next_pointer]
- model_weights.append(weight_arr_portion.reshape(model_shapes[j]))
- i, j = next_pointer, j + 1
- return int(float(state[0])), int(float(state[1])), int(float(state[2])), model_weights
-
- @staticmethod
- def serialize_weights(loss, accuracy, buffer_count, model_weights):
- """
- Parameters:
- loss, accuracy, buffer_count: float values
- model_weights: a list of numpy arrays, what you get from
- keras.get_weights()
- Returns:
- A stringified (serialized) state containing all these values, to be
- passed to postgres
- """
- if model_weights is None:
- return None
- flattened_weights = [w.flatten() for w in model_weights]
- model_weights_serialized = np.concatenate(flattened_weights)
- new_model_string = np.array([loss, accuracy, buffer_count])
- new_model_string = np.concatenate((new_model_string, model_weights_serialized))
- new_model_string = np.float32(new_model_string)
- return new_model_string.tostring()
-
- @staticmethod
- def deserialize_iteration_state(iteration_result):
- """
- Parameters:
- iteration_result: the output of the step function
- Returns:
- loss: the averaged loss from that iteration of training
- accuracy: the averaged accuracy from that iteration of training
- new_model_state: the stringified (serialized) state to pass in to next
- iteration of step function training, represents the averaged weights
- from the last iteration of training; zeros out loss, accuracy,
- buffer_count in this state because the new iteration must start with
- fresh values
- """
- if not iteration_result:
- return None
- state = np.fromstring(iteration_result, dtype=np.float32)
- new_model_string = np.array(state)
- new_model_string[0], new_model_string[1], new_model_string[2] = 0, 0, 0
- new_model_string = np.float32(new_model_string)
- return float(state[0]), float(state[1]), new_model_string.tostring()
-
- @staticmethod
- def deserialize_weights_merge(state):
- """
- Parameters:
- state: the stringified (serialized) state containing loss, accuracy, buffer_count, and
- model_weights, passed from postgres to merge function
- Returns:
- loss: the averaged loss from that iteration of training
- accuracy: the averaged accuracy from that iteration of training
- buffer_count: total buffer counts processed
- model_weights: a single flattened numpy array containing all of the
- weights, flattened because all we have to do is average them (so don't
- have to reshape)
- """
- if not state:
- return None
- state = np.fromstring(state, dtype=np.float32)
- return float(state[0]), float(state[1]), int(float(state[2])), state[3:]
-
- @staticmethod
- def serialize_weights_merge(loss, accuracy, buffer_count, model_weights):
- """
- Parameters:
- loss, accuracy, buffer_count: float values
- model_weights: a single flattened numpy array containing all of the
- weights, averaged in merge function over the 2 states
- Returns:
- A stringified (serialized) state containing all these values, to be
- passed to postgres
- """
- if model_weights is None:
- return None
- new_model_string = np.array([loss, accuracy, buffer_count])
- new_model_string = np.concatenate((new_model_string, model_weights))
- new_model_string = np.float32(new_model_string)
- return new_model_string.tostring()
-
- @staticmethod
- def deserialize_weights_orig(model_weights_serialized, model_shapes):
- """
- Original deserialization for warm-start, used only to parse model received
- from query at the top of this file
- """
- i, j, model_weights = 0, 0, []
- while j < len(model_shapes):
- next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
- weight_arr_portion = model_weights_serialized[i:next_pointer]
- model_weights.append(np.array(weight_arr_portion).reshape(model_shapes[j]))
- i, j = next_pointer, j + 1
- return model_weights
-
-
-#######################################################################
-########### General Helper functions #######
-#######################################################################
-
-def get_data_as_np_array(table_name, y, x, input_shape, num_classes):
- """
-
- :param table_name: Table containing the batch of images per row
- :param y: Column name for y
- :param x: Column name for x
- :param input_shape: input_shape of data in array format [L , W , C]
- :param num_classes: num of distinct classes in y
- :return:
- """
- val_data_qry = "SELECT {0}, {1} FROM {2}".format(y, x, table_name)
- input_shape = map(int, input_shape)
- val_data = plpy.execute(val_data_qry)
- indep_len = len(val_data[0][x])
- pixels_per_image = int(input_shape[0] * input_shape[1] * input_shape[2])
- x_validation = np.ndarray((0,indep_len, pixels_per_image))
- y_validation = np.ndarray((0,indep_len, num_classes))
- for i in range(len(val_data)):
- x_test = np.asarray((val_data[i][x],))
- x_test = x_test.reshape(1, indep_len, pixels_per_image)
- y_test = np.asarray((val_data[i][y],))
- x_validation=np.concatenate((x_validation, x_test))
- y_validation=np.concatenate((y_validation, y_test))
- num_test_examples = x_validation.shape[0]
- x_validation = x_validation.reshape(indep_len * num_test_examples, *input_shape)
- x_validation = x_validation.astype('float64')
- y_validation = y_validation.reshape(indep_len * num_test_examples, num_classes)
-
- return x_validation, y_validation
-
+import plpy
CLASS_VALUES_COLNAME = "class_values"
NORMALIZING_CONST_COLNAME = "normalizing_const"
DEPENDENT_VARTYPE = "dependent_vartype"
@@ -252,32 +65,44 @@ class FitInputValidator:
self._validate_input_table(self.source_table)
validate_dependent_var_for_minibatch(self.source_table,
self.dependent_varname)
- if self.validation_table and self.validation_table.strip() != '':
- input_tbl_valid(self.validation_table, self.module_name)
- self._validate_input_table(self.validation_table)
- validate_dependent_var_for_minibatch(self.validation_table,
- self.dependent_varname)
+
+ self._validate_validation_table()
+
# Validate model arch table's schema.
input_tbl_valid(self.model_arch_table, self.module_name)
# Validate output tables
output_tbl_valid(self.output_model_table, self.module_name)
output_tbl_valid(self.output_summary_model_table, self.module_name)
- def validate_input_shapes(self, table, input_shape):
+
+ def _validate_validation_table(self):
+ if self.validation_table and self.validation_table.strip() != '':
+ input_tbl_valid(self.validation_table, self.module_name)
+ self._validate_input_table(self.validation_table)
+ dependent_vartype = get_expr_type(self.dependent_varname,
+ self.validation_table)
+ _assert(is_valid_psql_type(dependent_vartype,
+ NUMERIC | ONLY_ARRAY),
+ "Dependent variable column {0} in validation table {1} should be "
+ "a numeric array and also one hot encoded.".format(
+ self.dependent_varname, self.validation_table))
+
+
+ def validate_input_shapes(self, table, input_shape, offset):
"""
Validate if the input shape specified in model architecture is the same
as the shape of the image specified in the indepedent var of the input
table.
+ offset: This offset is the index of the start of the image array. We also
+ need to consider that sql array indexes start from 1
+ For ex if the image is of shape [32,32,3] and is minibatched, the image will
+ look like [10, 32, 32, 3]. The offset in this case is 1 (start the index at 1) +
+ 1 (ignore the buffer size 10) = 2.
+ If the image is not batched then it will look like [32, 32 ,3] and the offset in
+ this case is 1 (start the index at 1).
"""
- # The weird indexing with 'i+2' and 'i' below has two reasons:
- # 1) The indexing for array_upper() starts from 1, but indexing in the
- # input_shape list starts from 0.
- # 2) Input_shape is only the image's dimension, whereas a row of
- # independent varname in a table contains buffer size as the first
- # dimension, followed by the image's dimension. So we must ignore
- # the first dimension from independent varname.
array_upper_query = ", ".join("array_upper({0}, {1}) AS n_{2}".format(
- self.independent_varname, i+2, i) for i in range(len(input_shape)))
+ self.independent_varname, i+offset, i) for i in range(len(input_shape)))
query = """
SELECT {0}
FROM {1}
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 85cd6c3..d89d703 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -29,13 +29,13 @@ import numpy as np
from utilities.model_arch_info import get_input_shape
from utilities.utilities import add_postfix
+from utilities.validate_args import get_col_value_and_type
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import output_tbl_valid
+from madlib_keras_helper import CLASS_VALUES_COLNAME
from madlib_keras_wrapper import compile_and_set_weights
-from madlib_keras_wrapper import convert_string_of_args_to_dict
-from madlib_keras_helper import CLASS_VALUES_COLNAME
-from madlib_keras_helper import KerasWeightsSerializer
+import madlib_keras_serializer
MODULE_NAME = 'madlib_keras_predict'
def predict(schema_madlib, model_table, test_table, id_col,
@@ -69,8 +69,8 @@ def predict(schema_madlib, model_table, test_table, id_col,
input_shape = get_input_shape(model_arch)
compile_params = "$madlib$" + compile_params + "$madlib$"
model_summary_table = add_postfix(model_table, "_summary")
- class_values = plpy.execute("SELECT {0} AS cv FROM {1}".format(
- CLASS_VALUES_COLNAME, model_summary_table))[0]['cv']
+ class_values, _ = get_col_value_and_type(model_summary_table,
+ CLASS_VALUES_COLNAME)
predict_query = plpy.prepare("""
CREATE TABLE {output_table} AS
SELECT {id_col},
@@ -89,7 +89,7 @@ def internal_keras_predict(x_test, model_arch, model_data, input_shape,
model = model_from_json(model_arch)
device_name = '/cpu:0'
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
- model_shapes = KerasWeightsSerializer.get_model_shapes(model)
+ model_shapes = madlib_keras_serializer.get_model_shapes(model)
compile_and_set_weights(model, compile_params, device_name,
model_data, model_shapes)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in
new file mode 100644
index 0000000..d52ed16
--- /dev/null
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in
@@ -0,0 +1,153 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+
+def get_model_shapes(model):
+ model_shapes = []
+ for a in model.get_weights():
+ model_shapes.append(a.shape)
+ return model_shapes
+
+# TODO
+# Current serializing logic
+# serialized string -> byte string
+# np.array(np.array(loss, acc, buff_count).concatenate(weights_np_array)).tostring()
+
+# Proposed logic
+# loss , accuracy and buffer_count can be comma separated values
+# weights -> np.array.tostring()
+# combine these 2 into one string by a random splitter
+# serialized string -> loss_splitter_acc_splitter_buffer_splitter_weights
+
+def deserialize_weights(model_state, model_shapes):
+ """
+ Parameters:
+ model_state: a stringified (serialized) state containing loss,
+ accuracy, buffer_count, and model_weights, passed from postgres
+ model_shapes: a list of tuples containing the shapes of each element
+ in keras.get_weights()
+ Returns:
+ buffer_count: the buffer count from state
+ model_weights: a list of numpy arrays that can be inputted into keras.set_weights()
+ """
+ if not model_state or not model_shapes:
+ return None
+ state = np.fromstring(model_state, dtype=np.float32)
+ model_weights_serialized = state[3:]
+ i, j, model_weights = 0, 0, []
+ while j < len(model_shapes):
+ next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
+ weight_arr_portion = model_weights_serialized[i:next_pointer]
+ model_weights.append(weight_arr_portion.reshape(model_shapes[j]))
+ i, j = next_pointer, j + 1
+ return int(float(state[0])), int(float(state[1])), int(float(state[2])), model_weights
+
+
+def serialize_weights(loss, accuracy, buffer_count, model_weights):
+ """
+ Parameters:
+ loss, accuracy, buffer_count: float values
+ model_weights: a list of numpy arrays, what you get from
+ keras.get_weights()
+ Returns:
+ A stringified (serialized) state containing all these values, to be
+ passed to postgres
+ """
+ if model_weights is None:
+ return None
+ flattened_weights = [w.flatten() for w in model_weights]
+ model_weights_serialized = np.concatenate(flattened_weights)
+ new_model_string = np.array([loss, accuracy, buffer_count])
+ new_model_string = np.concatenate((new_model_string, model_weights_serialized))
+ new_model_string = np.float32(new_model_string)
+ return new_model_string.tostring()
+
+
+def deserialize_iteration_state(iteration_result):
+ """
+ Parameters:
+ iteration_result: the output of the step function
+ Returns:
+ loss: the averaged loss from that iteration of training
+ accuracy: the averaged accuracy from that iteration of training
+ new_model_state: the stringified (serialized) state to pass in to next
+ iteration of step function training, represents the averaged weights
+ from the last iteration of training; zeros out loss, accuracy,
+ buffer_count in this state because the new iteration must start with
+ fresh values
+ """
+ if not iteration_result:
+ return None
+ state = np.fromstring(iteration_result, dtype=np.float32)
+ new_model_string = np.array(state)
+ new_model_string[0], new_model_string[1], new_model_string[2] = 0, 0, 0
+ new_model_string = np.float32(new_model_string)
+ return float(state[0]), float(state[1]), new_model_string.tostring()
+
+
+def deserialize_weights_merge(state):
+ """
+ Parameters:
+ state: the stringified (serialized) state containing loss, accuracy, buffer_count, and
+ model_weights, passed from postgres to merge function
+ Returns:
+ loss: the averaged loss from that iteration of training
+ accuracy: the averaged accuracy from that iteration of training
+ buffer_count: total buffer counts processed
+ model_weights: a single flattened numpy array containing all of the
+ weights, flattened because all we have to do is average them (so don't
+ have to reshape)
+ """
+ if not state:
+ return None
+ state = np.fromstring(state, dtype=np.float32)
+ return float(state[0]), float(state[1]), int(float(state[2])), state[3:]
+
+
+def serialize_weights_merge(loss, accuracy, buffer_count, model_weights):
+ """
+ Parameters:
+ loss, accuracy, buffer_count: float values
+ model_weights: a single flattened numpy array containing all of the
+ weights, averaged in merge function over the 2 states
+ Returns:
+ A stringified (serialized) state containing all these values, to be
+ passed to postgres
+ """
+ if model_weights is None:
+ return None
+ new_model_string = np.array([loss, accuracy, buffer_count])
+ new_model_string = np.concatenate((new_model_string, model_weights))
+ new_model_string = np.float32(new_model_string)
+ return new_model_string.tostring()
+
+
+def deserialize_weights_orig(model_weights_serialized, model_shapes):
+ """
+ Original deserialization for warm-start, used only to parse model received
+ from query at the top of this file
+ """
+ i, j, model_weights = 0, 0, []
+ while j < len(model_shapes):
+ next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
+ weight_arr_portion = model_weights_serialized[i:next_pointer]
+ model_weights.append(np.array(weight_arr_portion).reshape(model_shapes[j]))
+ i, j = next_pointer, j + 1
+ return model_weights
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 6f4706b..a9ebcef 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -17,25 +17,26 @@
# specific language governing permissions and limitations
# under the License.
-import numpy as np
-import os
-import plpy
-
import ast
+import os
+# Do not remove `import keras` although it's not directly used in this file.
+# See madlib_keras.py_in for more details
+import keras
from keras import backend as K
from keras import utils as keras_utils
from keras.optimizers import *
import keras.optimizers as opt
-from madlib_keras_helper import KerasWeightsSerializer
+import madlib_keras_serializer
from utilities.utilities import _assert
#######################################################################
########### Keras specific functions #####
#######################################################################
-def get_device_name_for_keras(use_gpu, seg, gpus_per_host):
+def get_device_name_for_keras(use_gpu, seg):
+ gpus_per_host = 4
if use_gpu:
device_name = '/gpu:0'
os.environ["CUDA_VISIBLE_DEVICES"] = str(seg % gpus_per_host)
@@ -68,7 +69,7 @@ def compile_and_set_weights(segment_model, compile_params, device_name,
segment_model.compile(optimizer=optimizers[opt_name](**final_args),
loss=compile_dict['loss'],
metrics=compile_dict['metrics'])
- _, _, _, model_weights = KerasWeightsSerializer.deserialize_weights(
+ _, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
previous_state, model_shapes)
segment_model.set_weights(model_weights)
@@ -87,7 +88,6 @@ def convert_string_of_args_to_dict(str_of_args):
}
result_str = ""
key_str = ""
- value_str = ""
compile_dict = {}
for char in str_of_args:
if char in dual.keys():
@@ -107,7 +107,6 @@ def convert_string_of_args_to_dict(str_of_args):
else:
result_str += char
value_str = result_str
- result_str = ""
compile_dict[key_str.strip()]=value_str.strip('\'')
return compile_dict
@@ -139,7 +138,7 @@ def parse_fit_params(str_of_args):
compile_dict[key] = ast.literal_eval(compile_dict[key])
return compile_dict
-# Split and strip the whispace of key=value formatted strings
+# Split and strip the whitespace of key=value formatted strings
def split_and_strip(x):
y = x.split('=')
return (y[0].strip(),y[1].strip())
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index ca6f9b6..ceb3d67 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------- *//**
+/* ---------------------------------------------------------------------*//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,20 +17,27 @@
* specific language governing permissions and limitations
* under the License.
*
- *//* ----------------------------------------------------------------------- */
+ *//* ---------------------------------------------------------------------*/
drop table if exists cifar_10_sample;
-create table cifar_10_sample(
- id INTEGER,
- x REAL[],
- y SMALLINT,
- imgpath TEXT
- );
+create table cifar_10_sample(id INTEGER, y SMALLINT, imgpath TEXT, x REAL[]);
copy cifar_10_sample from stdin delimiter '|';
-1|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{206,208,203},{207,209,2 [...]
-2|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119,112,104},{121,114,10 [...]
+1|0|'0/img0.jpg'|{{{202,204,199},{202,204,199},{204,206,201},{206,208,203},{208,210,205},{209,211,206},{210,212,207},{212,214,210},{213,215,212},{215,217,214},{216,218,215},{216,218,215},{215,217,214},{216,218,215},{216,218,215},{216,218,214},{217,219,214},{217,219,214},{218,220,215},{218,219,214},{216,217,212},{217,218,213},{218,219,214},{214,215,209},{213,214,207},{212,213,206},{211,212,205},{209,210,203},{208,209,202},{207,208,200},{205,206,199},{203,204,198}},{{206,208,203},{206,208, [...]
+2|1|'0/img2.jpg'|{{{126,118,110},{122,115,108},{126,119,111},{127,119,109},{130,122,111},{130,122,111},{132,124,113},{133,125,114},{130,122,111},{132,124,113},{134,126,115},{131,123,112},{131,123,112},{134,126,115},{133,125,114},{136,128,117},{137,129,118},{137,129,118},{136,128,117},{131,123,112},{130,122,111},{132,124,113},{132,124,113},{132,124,113},{129,122,110},{127,121,109},{127,121,109},{125,119,107},{124,118,106},{124,118,106},{120,114,102},{117,111,99}},{{122,115,107},{119,112,1 [...]
\.
+drop table if exists cifar_10_sample_val;
+create table cifar_10_sample_val(id INTEGER,dependent_var SMALLINT[], independent_var REAL[] );
+copy cifar_10_sample_val from stdin delimiter '|';
+1|{0,1}|{{{248,248,250},{245,245,246},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247},{245,245,247}},{{248,248,250},{245,245,247},{245 [...]
+2|{1,0}|{{{103,115,158},{102,114,156},{102,114,156},{103,115,155},{103,115,155},{103,115,154},{103,115,155},{104,116,157},{104,116,158},{104,115,157},{103,115,157},{104,115,158},{103,114,157},{103,114,156},{106,117,157},{104,115,156},{103,114,156},{104,114,157},{103,114,157},{108,117,154},{104,110,147},{78,83,124},{98,108,150},{104,115,158},{107,115,155},{123,132,163},{138,145,172},{138,144,172},{141,145,173},{128,132,163},{106,114,152},{102,113,155}},{{104,116,158},{103,115,157},{103,11 [...]
+\.
+-- normalize the indep variable
+-- TODO Calling this function makes keras.fit fail with the exception (investigate later)
+-- NOTICE: Releasing segworker groups to finish aborting the transaction.
+-- ERROR: could not connect to segment: initialization of segworker group failed (cdbgang.c:237)
+--update cifar_10_sample_val SET independent_var = array_scalar_mult(independent_var::real[], (1/255.0)::real);
+
DROP TABLE IF EXISTS cifar_10_sample_batched;
DROP TABLE IF EXISTS cifar_10_sample_batched_summary;
SELECT minibatch_preprocessor_dl('cifar_10_sample','cifar_10_sample_batched','y','x', 2, 255);
@@ -73,7 +80,7 @@ SELECT madlib_keras_fit(
$$ batch_size=2, epochs=1, verbose=0 $$::text,
3,
FALSE,
- 'cifar_10_sample_batched');
+ 'cifar_10_sample_val');
SELECT assert(
model_arch_table = 'model_arch' AND
@@ -82,7 +89,7 @@ SELECT assert(
start_training_time < now() AND
end_training_time > start_training_time AND
source_table = 'cifar_10_sample_batched' AND
- validation_table = 'cifar_10_sample_batched' AND
+ validation_table = 'cifar_10_sample_val' AND
model = 'keras_saved_out' AND
dependent_varname = 'dependent_var' AND
dependent_vartype = 'smallint' AND
@@ -97,22 +104,18 @@ SELECT assert(
num_iterations = 3 AND
num_classes = 2 AND
class_values = '{0,1}' AND
- accuracy is not NULL AND
- loss is not NULL AND
+ accuracy >= 0 AND
+ loss >= 0 AND
array_upper(accuracy_iter, 1) = 3 AND
array_upper(loss_iter, 1) = 3 AND
array_upper(time_iter, 1) = 3 AND
- accuracy_validation is not NULL AND
- loss_validation is not NULL AND
+ accuracy_validation >= 0 AND
+ loss_validation >= 0 AND
array_upper(accuracy_iter_validation, 1) = 3 AND
array_upper(loss_iter_validation, 1) = 3 ,
'Keras model output Summary Validation failed. Actual:' || __to_char(summary))
FROM (SELECT * FROM keras_saved_out_summary) summary;
-SELECT assert(accuracy_validation > 0.9999,
- 'Validation accuracy after 3 iterations of training is only ' || __to_char(100*accuracy) || '%, should have reached 100%')
- FROM keras_saved_out_summary;
-
SELECT assert(model_data IS NOT NULL , 'Keras model output validation failed') FROM (SELECT * FROM keras_saved_out) k;
@@ -129,7 +132,7 @@ SELECT madlib_keras_fit(
1,
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
- 1,
+ 2,
FALSE,
NULL,
'model name', 'model desc');
@@ -140,7 +143,7 @@ SELECT assert(
start_training_time < now() AND
end_training_time > start_training_time AND
source_table = 'cifar_10_sample_batched' AND
- validation_table = 'cifar_10_sample_batched' AND
+ validation_table is NULL AND
model = 'keras_out' AND
dependent_varname = 'dependent_var' AND
independent_varname = 'independent_var' AND
@@ -150,14 +153,14 @@ SELECT assert(
madlib_version is NOT NULL AND
compile_params = $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text AND
fit_params = $$ batch_size=2, epochs=1, verbose=0 $$::text AND
- num_iterations = 1 AND
+ num_iterations = 2 AND
num_classes = 2 AND
class_values = '{0,1}' AND
accuracy is not NULL AND
loss is not NULL AND
- array_upper(accuracy_iter, 1) = 1 AND
- array_upper(loss_iter, 1) = 1 AND
- array_upper(time_iter, 1) = 1 AND
+ array_upper(accuracy_iter, 1) = 2 AND
+ array_upper(loss_iter, 1) = 2 AND
+ array_upper(time_iter, 1) = 2 AND
accuracy_validation is NULL AND
loss_validation is NULL AND
array_upper(accuracy_iter_validation,1) = 0 AND
@@ -202,3 +205,23 @@ select assert(trap_error($TRAP$madlib_keras_predict(
'x',
'cifar10_predict');$TRAP$) = 1,
'Passing batched image table to predict should error out.');
+
+-- -- negative test case for passing non numeric y to fit
+-- induce failure by passing a non numeric column
+create table cifar_10_sample_val_failure as select * from cifar_10_sample_val;
+alter table cifar_10_sample_val_failure rename dependent_var to dependent_var_original;
+alter table cifar_10_sample_val_failure rename id to dependent_var;
+DROP TABLE IF EXISTS keras_out, keras_out_summary;
+select assert(trap_error($TRAP$madlib_keras_fit(
+ 'cifar_10_sample_batched',
+ 'keras_out',
+ 'dependent_var',
+ 'independent_var',
+ 'model_arch',
+ 1,
+ $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
+ $$ batch_size=2, epochs=1, verbose=0 $$::text,
+ 2,
+ FALSE,
+ 'cifar_10_sample_val_failure');$TRAP$) = 1,
+ 'Passing y of type non numeric array to fit should error out.');
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 84bad08..059bd11 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -30,8 +30,6 @@ from keras.layers import *
import unittest
from mock import *
import plpy_mock as plpy
-from keras.models import *
-from keras.layers import *
m4_changequote(`<!', `!>')
@@ -82,7 +80,7 @@ class MadlibKerasFitTestCase(unittest.TestCase):
k = {'SD': {'buffer_count': buffer_count}}
new_model_state = self.subject.fit_transition(
- None, [[0.5]] , [[0,1]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
+ None, [[[[0.5]]]] , [[0,1]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
self.model.to_json(), self.compile_params, self.fit_params, False,
previous_state.tostring(), **k)
buffer_count = np.fromstring(new_model_state, dtype=np.float32)[2]
@@ -113,7 +111,7 @@ class MadlibKerasFitTestCase(unittest.TestCase):
'model_shapes': self.model_shapes}}
k['SD']['segment_model'] = self.model
new_model_state = self.subject.fit_transition(
- state.tostring(), [[0.5]] , [[1,0]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
+ state.tostring(), [[[[0.5]]]] , [[1,0]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
self.model.to_json(), None, self.fit_params, False, 'dummy_previous_state', **k)
buffer_count = np.fromstring(new_model_state, dtype=np.float32)[2]
@@ -142,7 +140,7 @@ class MadlibKerasFitTestCase(unittest.TestCase):
'model_shapes': self.model_shapes}}
k['SD']['segment_model'] = self.model
new_model_state = self.subject.fit_transition(
- state.tostring(), [[0.5]] , [[0,1]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
+ state.tostring(), [[[[0.5]]]] , [[0,1]], 1, 2, self.all_seg_ids, self.total_buffers_per_seg,
self.model.to_json(), None, self.fit_params, False, 'dummy_previous_state', **k)
buffer_count = np.fromstring(new_model_state, dtype=np.float32)[2]
@@ -156,10 +154,10 @@ class MadlibKerasFitTestCase(unittest.TestCase):
def test_get_device_name_for_keras(self):
import os
self.assertEqual('/gpu:0', self.subject.get_device_name_for_keras(
- True, 1, 3))
+ True, 1))
self.assertEqual('1', os.environ["CUDA_VISIBLE_DEVICES"])
self.assertEqual('/cpu:0', self.subject.get_device_name_for_keras(
- False, 1, 3))
+ False, 1))
self.assertEqual('-1', os.environ["CUDA_VISIBLE_DEVICES"])
def test_fit_transition_first_tuple_none_ind_var_dep_var(self):
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
similarity index 52%
rename from src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_helper.py_in
rename to src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
index 61439f1..6844327 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
@@ -24,7 +24,6 @@ from os import path
sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
-from keras import utils as keras_utils
from keras.models import *
from keras.layers import *
@@ -46,8 +45,8 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
self.module_patcher = patch.dict('sys.modules', patches)
self.module_patcher.start()
- import madlib_keras_helper
- self.subject = madlib_keras_helper
+ import madlib_keras_serializer
+ self.subject = madlib_keras_serializer
self.model = Sequential()
self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
@@ -66,11 +65,11 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
self.module_patcher.stop()
def test_deserialize_weights_merge_null_state_returns_none(self):
- self.assertEqual(None, self.subject.KerasWeightsSerializer.deserialize_weights_merge(None))
+ self.assertEqual(None, self.subject.deserialize_weights_merge(None))
def test_deserialize_weights_merge_returns_not_none(self):
dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
- res = self.subject.KerasWeightsSerializer.deserialize_weights_merge(dummy_model_state.tostring())
+ res = self.subject.deserialize_weights_merge(dummy_model_state.tostring())
self.assertEqual(0, res[0])
self.assertEqual(1, res[1])
self.assertEqual(2, res[2])
@@ -78,14 +77,14 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
def test_deserialize_weights_null_input_returns_none(self):
dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
- self.assertEqual(None, self.subject.KerasWeightsSerializer.deserialize_weights(dummy_model_state.tostring(), None))
- self.assertEqual(None, self.subject.KerasWeightsSerializer.deserialize_weights(None, [1,2,3]))
- self.assertEqual(None, self.subject.KerasWeightsSerializer.deserialize_weights(None, None))
+ self.assertEqual(None, self.subject.deserialize_weights(dummy_model_state.tostring(), None))
+ self.assertEqual(None, self.subject.deserialize_weights(None, [1,2,3]))
+ self.assertEqual(None, self.subject.deserialize_weights(None, None))
def test_deserialize_weights_valid_input_returns_not_none(self):
dummy_model_state = np.array([0,1,2,3,4,5], dtype=np.float32)
dummy_model_shape = [(2, 1, 1, 1), (1,)]
- res = self.subject.KerasWeightsSerializer.deserialize_weights(dummy_model_state.tostring(), dummy_model_shape)
+ res = self.subject.deserialize_weights(dummy_model_state.tostring(), dummy_model_shape)
self.assertEqual(0, res[0])
self.assertEqual(1, res[1])
self.assertEqual(2, res[2])
@@ -100,21 +99,21 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
# we expect keras failure(ValueError) because we cannot reshape
# model weights of size 0 into shape (2,2,3,1)
with self.assertRaises(ValueError):
- self.subject.KerasWeightsSerializer.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
+ self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
invalid_model_state = np.array([0,1,2,3,4], dtype=np.float32)
dummy_model_shape = [(2, 2, 3, 1), (1,)]
# we expect keras failure(ValueError) because we cannot reshape
# model weights of size 2 into shape (2,2,3,1)
with self.assertRaises(ValueError):
- self.subject.KerasWeightsSerializer.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
+ self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
def test_deserialize_iteration_state_none_input_returns_none(self):
- self.assertEqual(None, self.subject.KerasWeightsSerializer.deserialize_iteration_state(None))
+ self.assertEqual(None, self.subject.deserialize_iteration_state(None))
def test_deserialize_iteration_state_returns_valid_output(self):
dummy_iteration_state = np.array([0,1,2,3,4,5], dtype=np.float32)
- res = self.subject.KerasWeightsSerializer.deserialize_iteration_state(
+ res = self.subject.deserialize_iteration_state(
dummy_iteration_state.tostring())
self.assertEqual(0, res[0])
self.assertEqual(1, res[1])
@@ -122,96 +121,87 @@ class MadlibKerasHelperTestCase(unittest.TestCase):
np.array([0,0,0,3,4,5], dtype=np.float32).tostring())
def test_serialize_weights_none_weights_returns_none(self):
- res = self.subject.KerasWeightsSerializer.serialize_weights(0,1,2,None)
+ res = self.subject.serialize_weights(0,1,2,None)
self.assertEqual(None , res)
def test_serialize_weights_valid_output(self):
- res = self.subject.KerasWeightsSerializer.serialize_weights(0,1,2,[np.array([1,3]),
+ res = self.subject.serialize_weights(0,1,2,[np.array([1,3]),
np.array([4,5])])
self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
res)
def test_serialize_weights_merge_none_weights_returns_none(self):
- res = self.subject.KerasWeightsSerializer.serialize_weights_merge(0,1,2,None)
+ res = self.subject.serialize_weights_merge(0,1,2,None)
self.assertEqual(None , res)
def test_serialize_weights_merge_valid_output(self):
- res = self.subject.KerasWeightsSerializer.serialize_weights_merge(0,1,2,np.array([1,3,4,5]))
+ res = self.subject.serialize_weights_merge(0,1,2,np.array([1,3,4,5]))
self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
res)
- def test_get_data_as_np_array_one_image_per_row(self):
- self.plpy_mock_execute.return_value = [{'x': [[1,2]], 'y': [[1, 0, 0]]},
- {'x': [[5,6]], 'y': [[0, 1, 0]]}]
- x_res, y_res = self.subject.get_data_as_np_array('foo','y','x', [1,1,2],
- 3)
- self.assertEqual(np.array([[[[1, 2]]], [[[5, 6]]]]).tolist(),
- x_res.tolist())
- self.assertEqual(np.array([[1, 0, 0], [0, 1, 0]]).tolist(),
- y_res.tolist())
-
- def test_get_data_as_np_array_multiple_images_per_row(self):
- self.plpy_mock_execute.return_value = [{'x': [[1,2], [3,4]], 'y': [[1,0,0],[0,0,1]]},
- {'x': [[5,6], [7,8]], 'y': [[0,1,0],[1,0,0]]}]
- x_res, y_res = self.subject.get_data_as_np_array('foo','y','x', [1,1,2],
- 3)
- self.assertEqual(np.array([[[[1,2]]], [[[3,4]]],
- [[[5,6]]], [[[7,8]]]]).tolist(),
- x_res.tolist())
- self.assertEqual(np.array([[1,0,0], [0,0,1] ,
- [0,1,0], [1,0,0]]).tolist(),
- y_res.tolist())
-
- def test_get_data_as_np_array_float_input_shape(self):
- self.plpy_mock_execute.return_value = [{'x': [[1,2]], 'y': [[1, 0, 0]]},
- {'x': [[5,6]], 'y': [[0, 1, 0]]}]
- x_res, y_res = self.subject.get_data_as_np_array('foo','y','x',
- [1.5,1.9,2.3], 3)
- self.assertEqual(np.array([[[[1, 2]]], [[[5, 6]]]]).tolist(),
- x_res.tolist())
- self.assertEqual(np.array([[1, 0, 0], [0, 1, 0]]).tolist(),
- y_res.tolist())
-
- def test_get_data_as_np_array_invalid_input_shape(self):
- self.plpy_mock_execute.return_value = [{'x': [[1,2]], 'y': [[1, 0, 0]]},
- {'x': [[5,6]], 'y': [[0, 1, 0]]}]
- # we expect keras failure(ValueError) because we cannot reshape
- # the input which is of size 2 to input shape of 1,1,3
- with self.assertRaises(ValueError):
- self.subject.get_data_as_np_array('foo','y','x', [1,1,3], 3)
+class MadlibSerializerTestCase(unittest.TestCase):
+ def setUp(self):
+ self.plpy_mock = Mock(spec='error')
+ patches = {
+ 'plpy': plpy
+ }
+
+ self.plpy_mock_execute = MagicMock()
+ plpy.execute = self.plpy_mock_execute
+
+ self.module_patcher = patch.dict('sys.modules', patches)
+ self.module_patcher.start()
+ from madlib_keras_helper import FitInputValidator
+ self.subject = FitInputValidator
+
+ self.model = Sequential()
+ self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+ input_shape=(1,1,1,), padding='same'))
+ self.model.add(Flatten())
+
+ self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
+ self.fit_params = "'batch_size'=1, 'epochs'=1"
+ self.model_weights = [3,4,5,6]
+ self.loss = 1.3
+ self.accuracy = 0.34
+ self.all_seg_ids = [0,1,2]
+ self.total_buffers_per_seg = [3,3,3]
+
+ def tearDown(self):
+ self.module_patcher.stop()
def test_validate_input_shapes_shapes_do_not_match(self):
self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32}]
- self.subject.FitInputValidator._validate_input_args = Mock()
- input_validator_obj = self.subject.FitInputValidator('foo',
- 'foo_valid',
- 'model',
- 'model_arch_table',
- 'dependent_varname',
- 'independent_varname',
- 1)
+ self.subject._validate_input_args = Mock()
+ input_validator_obj = self.subject('foo',
+ 'foo_valid',
+ 'model',
+ 'model_arch_table',
+ 'dependent_varname',
+ 'independent_varname',
+ 1)
with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3])
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': 32, 'n_2': 32}]
with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3])
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': None, 'n_2': None}]
with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [3,32])
+ input_validator_obj.validate_input_shapes('dummy_tbl', [3,32], 2)
def test_validate_input_shapes_shapes_match(self):
self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32, 'n_2': 3}]
- self.subject.FitInputValidator._validate_input_args = Mock()
- input_validator_obj = self.subject.FitInputValidator('foo',
- 'foo_valid',
- 'model',
- 'model_arch_table',
- 'dependent_varname',
- 'independent_varname',
- 1)
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3])
+ self.subject._validate_input_args = Mock()
+ input_validator_obj = self.subject('foo',
+ 'foo_valid',
+ 'model',
+ 'model_arch_table',
+ 'dependent_varname',
+ 'independent_varname',
+ 1)
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 1)
if __name__ == '__main__':
unittest.main()
diff --git a/src/ports/postgres/modules/utilities/minibatch_validation.py_in b/src/ports/postgres/modules/utilities/minibatch_validation.py_in
index 5c77c2c..1c92a13 100644
--- a/src/ports/postgres/modules/utilities/minibatch_validation.py_in
+++ b/src/ports/postgres/modules/utilities/minibatch_validation.py_in
@@ -29,12 +29,14 @@ def validate_dependent_var_for_minibatch(table_name, var_name, expr_type=None):
if not expr_type:
expr_type = get_expr_type(var_name, table_name)
_assert(is_valid_psql_type(expr_type, NUMERIC | ONLY_ARRAY),
- "Dependent variable column should be a numeric array.")
+ "Dependent variable column {0} in table {1} "
+ "should be a numeric array.".format(var_name, table_name))
query = """SELECT array_upper({var_name}, 2) > 1 AS is_encoded FROM
{table_name} LIMIT 1;""".format(**locals())
result = plpy.execute(query)
if not result[0]["is_encoded"]:
- plpy.error("MiniBatch expects the variable {0} to be one hot encoded."
- " You might need to re run the minibatch_preprocessor function"
- " and make sure that the variable is encoded".format(var_name))
+ plpy.error("Dependent variable column {0} in table {1} should be "
+ "minibatched and one hot encoded.You might need to re run "
+ "the minibatch_preprocessor function and make sure that "
+ "the variable is encoded".format(var_name, table_name))
[madlib] 05/05: DL: Rename helper to validator and move all unit
tests to madlib_keras.py_in
Posted by nk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit c4f8349d6431d88b583d45053e053b7206ad90f8
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Apr 10 17:03:57 2019 -0700
DL: Rename helper to validator and move all unit tests to madlib_keras.py_in
JIRA: MADLIB-1304
Closes #367
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
.../modules/deep_learning/madlib_keras.py_in | 8 +-
.../deep_learning/madlib_keras_predict.py_in | 2 +-
...s_helper.py_in => madlib_keras_validator.py_in} | 0
.../test/unit_tests/test_madlib_keras.py_in | 169 +++++++++++++++++
.../unit_tests/test_madlib_keras_serializer.py_in | 207 ---------------------
5 files changed, 174 insertions(+), 212 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index ea77d2e..6add8ba 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -36,10 +36,10 @@ from keras.models import *
from keras.optimizers import *
from keras.regularizers import *
import madlib_keras_serializer
-from madlib_keras_helper import CLASS_VALUES_COLNAME
-from madlib_keras_helper import DEPENDENT_VARTYPE
-from madlib_keras_helper import NORMALIZING_CONST_COLNAME
-from madlib_keras_helper import FitInputValidator
+from madlib_keras_validator import CLASS_VALUES_COLNAME
+from madlib_keras_validator import DEPENDENT_VARTYPE
+from madlib_keras_validator import NORMALIZING_CONST_COLNAME
+from madlib_keras_validator import FitInputValidator
from madlib_keras_wrapper import *
from keras_model_arch_table import Format
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 1475a0f..1180d33 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -32,7 +32,7 @@ from utilities.utilities import add_postfix
from utilities.validate_args import get_col_value_and_type
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import output_tbl_valid
-from madlib_keras_helper import CLASS_VALUES_COLNAME
+from madlib_keras_validator import CLASS_VALUES_COLNAME
from keras_model_arch_table import Format
from madlib_keras_wrapper import compile_and_set_weights
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
similarity index 100%
rename from src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
rename to src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 8ca4958..c152c53 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -175,6 +175,175 @@ class MadlibKerasFitTestCase(unittest.TestCase):
[0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", False,
'dummy_prev_state', **k))
+class MadlibKerasValidatorTestCase(unittest.TestCase):
+ def setUp(self):
+ self.plpy_mock = Mock(spec='error')
+ patches = {
+ 'plpy': plpy
+ }
+
+ self.plpy_mock_execute = MagicMock()
+ plpy.execute = self.plpy_mock_execute
+
+ self.module_patcher = patch.dict('sys.modules', patches)
+ self.module_patcher.start()
+ from madlib_keras_validator import FitInputValidator
+ self.subject = FitInputValidator
+
+ self.model = Sequential()
+ self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+ input_shape=(1,1,1,), padding='same'))
+ self.model.add(Flatten())
+
+ self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
+ self.fit_params = "'batch_size'=1, 'epochs'=1"
+ self.model_weights = [3,4,5,6]
+ self.loss = 1.3
+ self.accuracy = 0.34
+ self.all_seg_ids = [0,1,2]
+ self.total_buffers_per_seg = [3,3,3]
+
+ def tearDown(self):
+ self.module_patcher.stop()
+
+ def test_validate_input_shapes_shapes_do_not_match(self):
+ self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32}]
+ self.subject._validate_input_args = Mock()
+ input_validator_obj = self.subject('foo',
+ 'foo_valid',
+ 'model',
+ 'model_arch_table',
+ 'dependent_varname',
+ 'independent_varname',
+ 1)
+ with self.assertRaises(plpy.PLPYException):
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
+
+ self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': 32, 'n_2': 32}]
+ with self.assertRaises(plpy.PLPYException):
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
+
+ self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': None, 'n_2': None}]
+ with self.assertRaises(plpy.PLPYException):
+ input_validator_obj.validate_input_shapes('dummy_tbl', [3,32], 2)
+
+ def test_validate_input_shapes_shapes_match(self):
+ self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32, 'n_2': 3}]
+ self.subject._validate_input_args = Mock()
+ input_validator_obj = self.subject('foo',
+ 'foo_valid',
+ 'model',
+ 'model_arch_table',
+ 'dependent_varname',
+ 'independent_varname',
+ 1)
+ input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 1)
+
+class MadlibSerializerTestCase(unittest.TestCase):
+ def setUp(self):
+ self.plpy_mock = Mock(spec='error')
+ patches = {
+ 'plpy': plpy
+ }
+
+ self.plpy_mock_execute = MagicMock()
+ plpy.execute = self.plpy_mock_execute
+
+ self.module_patcher = patch.dict('sys.modules', patches)
+ self.module_patcher.start()
+ import madlib_keras_serializer
+ self.subject = madlib_keras_serializer
+
+ self.model = Sequential()
+ self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
+ input_shape=(1,1,1,), padding='same'))
+ self.model.add(Flatten())
+
+ self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
+ self.fit_params = "'batch_size'=1, 'epochs'=1"
+ self.model_weights = [3,4,5,6]
+ self.loss = 1.3
+ self.accuracy = 0.34
+ self.all_seg_ids = [0,1,2]
+ self.total_buffers_per_seg = [3,3,3]
+
+ def tearDown(self):
+ self.module_patcher.stop()
+
+ def test_deserialize_weights_merge_null_state_returns_none(self):
+ self.assertEqual(None, self.subject.deserialize_weights_merge(None))
+
+ def test_deserialize_weights_merge_returns_not_none(self):
+ dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
+ res = self.subject.deserialize_weights_merge(dummy_model_state.tostring())
+ self.assertEqual(0, res[0])
+ self.assertEqual(1, res[1])
+ self.assertEqual(2, res[2])
+ self.assertEqual([3,4,5,6], res[3].tolist())
+
+ def test_deserialize_weights_null_input_returns_none(self):
+ dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
+ self.assertEqual(None, self.subject.deserialize_weights(dummy_model_state.tostring(), None))
+ self.assertEqual(None, self.subject.deserialize_weights(None, [1,2,3]))
+ self.assertEqual(None, self.subject.deserialize_weights(None, None))
+
+ def test_deserialize_weights_valid_input_returns_not_none(self):
+ dummy_model_state = np.array([0,1,2,3,4,5], dtype=np.float32)
+ dummy_model_shape = [(2, 1, 1, 1), (1,)]
+ res = self.subject.deserialize_weights(dummy_model_state.tostring(), dummy_model_shape)
+ self.assertEqual(0, res[0])
+ self.assertEqual(1, res[1])
+ self.assertEqual(2, res[2])
+ self.assertEqual([[[[3.0]]], [[[4.0]]]], res[3][0].tolist())
+ self.assertEqual([5], res[3][1].tolist())
+
+ def test_deserialize_weights_invalid_input_fails(self):
+ # pass an invalid state with missing model weights
+ invalid_model_state = np.array([0,1,2], dtype=np.float32)
+ dummy_model_shape = [(2, 1, 1, 1), (1,)]
+
+ # we expect keras failure(ValueError) because we cannot reshape
+ # model weights of size 0 into shape (2,2,3,1)
+ with self.assertRaises(ValueError):
+ self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
+
+ invalid_model_state = np.array([0,1,2,3,4], dtype=np.float32)
+ dummy_model_shape = [(2, 2, 3, 1), (1,)]
+ # we expect keras failure(ValueError) because we cannot reshape
+ # model weights of size 2 into shape (2,2,3,1)
+ with self.assertRaises(ValueError):
+ self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
+
+ def test_deserialize_iteration_state_none_input_returns_none(self):
+ self.assertEqual(None, self.subject.deserialize_iteration_state(None))
+
+ def test_deserialize_iteration_state_returns_valid_output(self):
+ dummy_iteration_state = np.array([0,1,2,3,4,5], dtype=np.float32)
+ res = self.subject.deserialize_iteration_state(
+ dummy_iteration_state.tostring())
+ self.assertEqual(0, res[0])
+ self.assertEqual(1, res[1])
+ self.assertEqual(res[2],
+ np.array([0,0,0,3,4,5], dtype=np.float32).tostring())
+
+ def test_serialize_weights_none_weights_returns_none(self):
+ res = self.subject.serialize_weights(0,1,2,None)
+ self.assertEqual(None , res)
+
+ def test_serialize_weights_valid_output(self):
+ res = self.subject.serialize_weights(0,1,2,[np.array([1,3]),
+ np.array([4,5])])
+ self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
+ res)
+
+ def test_serialize_weights_merge_none_weights_returns_none(self):
+ res = self.subject.serialize_weights_merge(0,1,2,None)
+ self.assertEqual(None , res)
+
+ def test_serialize_weights_merge_valid_output(self):
+ res = self.subject.serialize_weights_merge(0,1,2,np.array([1,3,4,5]))
+ self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
+ res)
class MadlibKerasPredictTestCase(unittest.TestCase):
def setUp(self):
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
deleted file mode 100644
index 8264800..0000000
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_serializer.py_in
+++ /dev/null
@@ -1,207 +0,0 @@
-# coding=utf-8
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-import numpy as np
-from os import path
-# Add convex module to the pythonpath.
-sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
-sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
-
-from keras.models import *
-from keras.layers import *
-
-import unittest
-from mock import *
-import plpy_mock as plpy
-
-m4_changequote(`<!', `!>')
-
-class MadlibSerializerTestCase(unittest.TestCase):
- def setUp(self):
- self.plpy_mock = Mock(spec='error')
- patches = {
- 'plpy': plpy
- }
-
- self.plpy_mock_execute = MagicMock()
- plpy.execute = self.plpy_mock_execute
-
- self.module_patcher = patch.dict('sys.modules', patches)
- self.module_patcher.start()
- import madlib_keras_serializer
- self.subject = madlib_keras_serializer
-
- self.model = Sequential()
- self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
- input_shape=(1,1,1,), padding='same'))
- self.model.add(Flatten())
-
- self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
- self.fit_params = "'batch_size'=1, 'epochs'=1"
- self.model_weights = [3,4,5,6]
- self.loss = 1.3
- self.accuracy = 0.34
- self.all_seg_ids = [0,1,2]
- self.total_buffers_per_seg = [3,3,3]
-
- def tearDown(self):
- self.module_patcher.stop()
-
- def test_deserialize_weights_merge_null_state_returns_none(self):
- self.assertEqual(None, self.subject.deserialize_weights_merge(None))
-
- def test_deserialize_weights_merge_returns_not_none(self):
- dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
- res = self.subject.deserialize_weights_merge(dummy_model_state.tostring())
- self.assertEqual(0, res[0])
- self.assertEqual(1, res[1])
- self.assertEqual(2, res[2])
- self.assertEqual([3,4,5,6], res[3].tolist())
-
- def test_deserialize_weights_null_input_returns_none(self):
- dummy_model_state = np.array([0,1,2,3,4,5,6], dtype=np.float32)
- self.assertEqual(None, self.subject.deserialize_weights(dummy_model_state.tostring(), None))
- self.assertEqual(None, self.subject.deserialize_weights(None, [1,2,3]))
- self.assertEqual(None, self.subject.deserialize_weights(None, None))
-
- def test_deserialize_weights_valid_input_returns_not_none(self):
- dummy_model_state = np.array([0,1,2,3,4,5], dtype=np.float32)
- dummy_model_shape = [(2, 1, 1, 1), (1,)]
- res = self.subject.deserialize_weights(dummy_model_state.tostring(), dummy_model_shape)
- self.assertEqual(0, res[0])
- self.assertEqual(1, res[1])
- self.assertEqual(2, res[2])
- self.assertEqual([[[[3.0]]], [[[4.0]]]], res[3][0].tolist())
- self.assertEqual([5], res[3][1].tolist())
-
- def test_deserialize_weights_invalid_input_fails(self):
- # pass an invalid state with missing model weights
- invalid_model_state = np.array([0,1,2], dtype=np.float32)
- dummy_model_shape = [(2, 1, 1, 1), (1,)]
-
- # we expect keras failure(ValueError) because we cannot reshape
- # model weights of size 0 into shape (2,2,3,1)
- with self.assertRaises(ValueError):
- self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
-
- invalid_model_state = np.array([0,1,2,3,4], dtype=np.float32)
- dummy_model_shape = [(2, 2, 3, 1), (1,)]
- # we expect keras failure(ValueError) because we cannot reshape
- # model weights of size 2 into shape (2,2,3,1)
- with self.assertRaises(ValueError):
- self.subject.deserialize_weights(invalid_model_state.tostring(), dummy_model_shape)
-
- def test_deserialize_iteration_state_none_input_returns_none(self):
- self.assertEqual(None, self.subject.deserialize_iteration_state(None))
-
- def test_deserialize_iteration_state_returns_valid_output(self):
- dummy_iteration_state = np.array([0,1,2,3,4,5], dtype=np.float32)
- res = self.subject.deserialize_iteration_state(
- dummy_iteration_state.tostring())
- self.assertEqual(0, res[0])
- self.assertEqual(1, res[1])
- self.assertEqual(res[2],
- np.array([0,0,0,3,4,5], dtype=np.float32).tostring())
-
- def test_serialize_weights_none_weights_returns_none(self):
- res = self.subject.serialize_weights(0,1,2,None)
- self.assertEqual(None , res)
-
- def test_serialize_weights_valid_output(self):
- res = self.subject.serialize_weights(0,1,2,[np.array([1,3]),
- np.array([4,5])])
- self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
- res)
-
- def test_serialize_weights_merge_none_weights_returns_none(self):
- res = self.subject.serialize_weights_merge(0,1,2,None)
- self.assertEqual(None , res)
-
- def test_serialize_weights_merge_valid_output(self):
- res = self.subject.serialize_weights_merge(0,1,2,np.array([1,3,4,5]))
- self.assertEqual(np.array([0,1,2,1,3,4,5], dtype=np.float32).tostring(),
- res)
-
-class MadlibKerasHelperTestCase(unittest.TestCase):
- def setUp(self):
- self.plpy_mock = Mock(spec='error')
- patches = {
- 'plpy': plpy
- }
-
- self.plpy_mock_execute = MagicMock()
- plpy.execute = self.plpy_mock_execute
-
- self.module_patcher = patch.dict('sys.modules', patches)
- self.module_patcher.start()
- from madlib_keras_helper import FitInputValidator
- self.subject = FitInputValidator
-
- self.model = Sequential()
- self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu',
- input_shape=(1,1,1,), padding='same'))
- self.model.add(Flatten())
-
- self.compile_params = "'optimizer'=SGD(lr=0.01, decay=1e-6, nesterov=True), 'loss'='categorical_crossentropy', 'metrics'=['accuracy']"
- self.fit_params = "'batch_size'=1, 'epochs'=1"
- self.model_weights = [3,4,5,6]
- self.loss = 1.3
- self.accuracy = 0.34
- self.all_seg_ids = [0,1,2]
- self.total_buffers_per_seg = [3,3,3]
-
- def tearDown(self):
- self.module_patcher.stop()
-
- def test_validate_input_shapes_shapes_do_not_match(self):
- self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32}]
- self.subject._validate_input_args = Mock()
- input_validator_obj = self.subject('foo',
- 'foo_valid',
- 'model',
- 'model_arch_table',
- 'dependent_varname',
- 'independent_varname',
- 1)
- with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
-
- self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': 32, 'n_2': 32}]
- with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 2)
-
- self.plpy_mock_execute.return_value = [{'n_0': 3, 'n_1': None, 'n_2': None}]
- with self.assertRaises(plpy.PLPYException):
- input_validator_obj.validate_input_shapes('dummy_tbl', [3,32], 2)
-
- def test_validate_input_shapes_shapes_match(self):
- self.plpy_mock_execute.return_value = [{'n_0': 32, 'n_1': 32, 'n_2': 3}]
- self.subject._validate_input_args = Mock()
- input_validator_obj = self.subject('foo',
- 'foo_valid',
- 'model',
- 'model_arch_table',
- 'dependent_varname',
- 'independent_varname',
- 1)
- input_validator_obj.validate_input_shapes('dummy_tbl', [32,32,3], 1)
-
-if __name__ == '__main__':
- unittest.main()
[madlib] 02/05: DL: Use FORMAT class for getting model_arch col
names
Posted by nk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 8570aa03f9f13e63b9bf7d078659b9d16c964744
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Tue Apr 9 17:51:04 2019 -0700
DL: Use FORMAT class for getting model_arch col names
JIRA: MADLIB-1304
Use FORMAT class for getting model_arch col names(previously it was
hardcoded, which caused discrepancy between minibatch_preprocessor_dl()
and madlib_keras_fit()).
Closes #367
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
.../deep_learning/keras_model_arch_table.py_in | 19 ++++++++++---------
.../postgres/modules/deep_learning/madlib_keras.py_in | 14 ++++++++------
.../modules/deep_learning/madlib_keras_predict.py_in | 12 +++++++-----
.../modules/deep_learning/test/madlib_keras.sql_in | 1 -
4 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
index a0521ee..8ed3ad6 100644
--- a/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
+++ b/src/ports/postgres/modules/deep_learning/keras_model_arch_table.py_in
@@ -55,7 +55,7 @@ class Format:
"""
col_names = ('model_id', 'model_arch', 'model_weights', '__internal_madlib_id__')
col_types = ('SERIAL PRIMARY KEY', 'JSON', 'DOUBLE PRECISION[]', 'TEXT')
- (model_id, model_arch, model_weights, __internal_madlib_id__) = col_names
+ (MODEL_ID, MODEL_ARCH, MODEL_WEIGHTS, __INTERNAL_MADLIB_ID__) = col_names
@MinWarning("warning")
def _execute(sql,max_rows=0):
@@ -89,18 +89,18 @@ def load_keras_model(schema_madlib, keras_model_arch_table,
SELECT {model_id_col}, {model_arch_col}
FROM {model_arch_table} WHERE {internal_id_col} = '{unique_str}'
""".format(model_arch_table=model_arch_table,
- model_arch_col=Format.model_arch,
+ model_arch_col=Format.MODEL_ARCH,
unique_str=unique_str,
model_arch=quote_literal(model_arch),
- model_id_col=Format.model_id,
- internal_id_col=Format.__internal_madlib_id__)
+ model_id_col=Format.MODEL_ID,
+ internal_id_col=Format.__INTERNAL_MADLIB_ID__)
res = _execute(sql,1)
- if len(res) != 1 or res[0][Format.model_arch] != model_arch:
+ if len(res) != 1 or res[0][Format.MODEL_ARCH] != model_arch:
raise Exception("Failed to insert new row in {0} table--try again?"
.format(model_arch_table))
plpy.info("Keras Model Arch: Added model id {0} to {1} table".
- format(res[0]['model_id'], model_arch_table))
+ format(res[0][Format.MODEL_ID], model_arch_table))
def delete_keras_model(schema_madlib, keras_model_arch_table,
model_id, **kwargs):
@@ -113,8 +113,9 @@ def delete_keras_model(schema_madlib, keras_model_arch_table,
" missing columns: {1}".format(model_arch_table, missing_cols))
sql = """
- DELETE FROM {model_arch_table} WHERE model_id={model_id}
- """.format(model_arch_table=model_arch_table, model_id=model_id)
+ DELETE FROM {model_arch_table} WHERE {model_id_col}={model_id}
+ """.format(model_arch_table=model_arch_table, model_id_col=Format.MODEL_ID,
+ model_id=model_id)
res = _execute(sql)
if res.nrows() > 0:
@@ -123,7 +124,7 @@ def delete_keras_model(schema_madlib, keras_model_arch_table,
else:
plpy.error("Keras Model Arch: Model id {0} not found".format(model_id))
- sql = "SELECT model_id FROM {0}".format(model_arch_table)
+ sql = "SELECT {0} FROM {1}".format(Format.MODEL_ID, model_arch_table)
res = _execute(sql)
if not res:
plpy.info("Keras Model Arch: Dropping empty keras model arch "\
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index c3f36ab..b883cac 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -41,6 +41,7 @@ from madlib_keras_helper import DEPENDENT_VARTYPE
from madlib_keras_helper import NORMALIZING_CONST_COLNAME
from madlib_keras_helper import FitInputValidator
from madlib_keras_wrapper import *
+from keras_model_arch_table import Format
from utilities.model_arch_info import get_input_shape
from utilities.model_arch_info import get_num_classes
@@ -63,22 +64,23 @@ def fit(schema_madlib, source_table, model, dependent_varname,
use_gpu = bool(use_gpu)
# Get the serialized master model
- #TODO fix hardcoding of col names
start_deserialization = time.time()
- model_arch_query = "SELECT model_arch, model_weights FROM {0} WHERE "\
- "id = {1}".format(model_arch_table, model_arch_id)
+ model_arch_query = "SELECT {0}, {1} FROM {2} WHERE {3} = {4}".format(
+ Format.MODEL_ARCH, Format.MODEL_WEIGHTS,
+ model_arch_table, Format.MODEL_ID,
+ model_arch_id)
query_result = plpy.execute(model_arch_query)
if not query_result:
plpy.error("no model arch found in table {0} with id {1}".format(
model_arch_table, model_arch_id))
query_result = query_result[0]
- model_arch = query_result['model_arch']
+ model_arch = query_result[Format.MODEL_ARCH]
input_shape = get_input_shape(model_arch)
num_classes = get_num_classes(model_arch)
fit_validator.validate_input_shapes(source_table, input_shape, 2)
if validation_table:
fit_validator.validate_input_shapes(validation_table, input_shape, 1)
- model_weights_serialized = query_result['model_weights']
+ model_weights_serialized = query_result[Format.MODEL_WEIGHTS]
# Convert model from json and initialize weights
master_model = model_from_json(model_arch)
@@ -510,7 +512,7 @@ def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table,
plpy.error("no model arch found in table {0} with id {1}".format(
model_arch_table, model_arch_id))
query_result = query_result[0]
- model_arch = query_result['model_arch']
+ model_arch = query_result[Format.MODEL_ARCH]
compile_params = "$madlib$" + compile_params + "$madlib$"
loss_acc = get_loss_acc_from_keras_eval(schema_madlib, test_table, dependent_varname,
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index d89d703..1475a0f 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -33,6 +33,7 @@ from utilities.validate_args import get_col_value_and_type
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import output_tbl_valid
from madlib_keras_helper import CLASS_VALUES_COLNAME
+from keras_model_arch_table import Format
from madlib_keras_wrapper import compile_and_set_weights
import madlib_keras_serializer
@@ -56,16 +57,17 @@ def predict(schema_madlib, model_table, test_table, id_col,
model_data = plpy.execute(model_data_query)[0]['model_data']
model_arch_query = """
- SELECT model_arch, model_weights
- FROM {0}
- WHERE id = {1}
- """.format(model_arch_table, model_arch_id)
+ SELECT {0}, {1}
+ FROM {2}
+ WHERE {3} = {4}
+ """.format(Format.MODEL_ARCH, Format.MODEL_WEIGHTS,model_arch_table,
+ Format.MODEL_ID, model_arch_id)
query_result = plpy.execute(model_arch_query)
if not query_result or len(query_result) == 0:
plpy.error("{0}: No model arch found in table {1} with id {2}".format(
MODULE_NAME, model_arch_table, model_arch_id))
query_result = query_result[0]
- model_arch = query_result['model_arch']
+ model_arch = query_result[Format.MODEL_ARCH]
input_shape = get_input_shape(model_arch)
compile_params = "$madlib$" + compile_params + "$madlib$"
model_summary_table = add_postfix(model_table, "_summary")
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index ceb3d67..7902db4 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -63,7 +63,6 @@ SELECT load_keras_model('model_arch',
{"class_name": "Dense", "config": {"kernel_initializer": {"class_name": "VarianceScaling", "config": {"distribution": "uniform", "scale": 1.0, "seed": null, "mode": "fan_avg"}}, "name": "dense_1", "kernel_constraint": null, "bias_regularizer": null, "bias_constraint": null, "activation": "softmax", "trainable": true, "kernel_regularizer": null, "bias_initializer":
{"class_name": "Zeros", "config": {}}, "units": 2, "use_bias": true, "activity_regularizer": null}
}], "backend": "tensorflow"}$$);
-ALTER TABLE model_arch RENAME model_id TO id;
-- Please do not break up the compile_params string
-- It might break the assertion
[madlib] 04/05: DL: Remove evaluate function
Posted by nk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 63847d721c8c12c6b255414a2dc894392cde5617
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Apr 10 16:25:14 2019 -0700
DL: Remove evaluate function
JIRA: MADLIB-1304
The `evaluate` UDF keeps all the validation table data in master's
memory and then calls keras.eval on it. We can instead run keras.eval on
all the segment hosts so that we don't have to keep all data in in one
node's memory. We already have a UDF named `evaluate` that is
implemented with this logic which will be renamed to `evaluate` in a
future PR.
This commit removes the evaluate function from the sql as
well as the python file.
Closes #367
Co-authored-by: Jingyi Mei <jm...@pivotal.io>
---
.../modules/deep_learning/madlib_keras.py_in | 52 ----------------------
.../modules/deep_learning/madlib_keras.sql_in | 27 -----------
2 files changed, 79 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 83ca5a4..ea77d2e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -439,58 +439,6 @@ def fit_merge(state1, state2, **kwargs):
def fit_final(state, **kwargs):
return state
-def evaluate(schema_madlib, model_table, source_table, id_col,
- model_arch_table, model_arch_id, dependent_varname,
- independent_varname, compile_params, output_table,
- **kwargs):
- module_name = 'madlib_keras_evaluate'
- input_tbl_valid(source_table, module_name)
- input_tbl_valid(model_arch_table, module_name)
- output_tbl_valid(output_table, module_name)
-
- # _validate_input_args(test_table, model_arch_table, output_table)
- device_name = '/cpu:0'
- os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
-
- model_data_query = "SELECT model_data from {0}".format(model_table)
- model_data = plpy.execute(model_data_query)[0]['model_data']
-
- model_arch_query = "SELECT model_arch, model_weights FROM {0} " \
- "WHERE id = {1}".format(model_arch_table, model_arch_id)
- query_result = plpy.execute(model_arch_query)
-
- query_result = query_result[0]
- model_arch = query_result['model_arch']
- input_shape = get_input_shape(model_arch)
- model = model_from_json(model_arch)
-
- model_shapes = []
- for weight_arr in model.get_weights():
- model_shapes.append(weight_arr.shape)
- _, updated_weights = madlib_keras_serializer.deserialize_weights(
- model_data, model_shapes)
- model.set_weights(updated_weights)
- optimizers = get_optimizers()
- (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
- with K.tf.device(device_name):
- model.compile(optimizer=optimizers[opt_name](**final_args),
- loss=compile_dict['loss'],
- metrics=compile_dict['metrics'])
-
- input_shape = map(int, input_shape)
- x_validation, y_validation = get_data_as_np_array(source_table,
- dependent_varname,
- independent_varname)
-
- plpy.info('X shape : {0}'.format(x_validation.shape))
- plpy.info('Y shape : {0}'.format(y_validation.shape))
-
- with K.tf.device(device_name):
- evaluate_result = model.evaluate(x_validation, y_validation)
-
- plpy.info('evaluate result is {}'.format(evaluate_result))
-
-
def evaluate1(schema_madlib, model_table, test_table, id_col, model_arch_table,
model_arch_id, dependent_varname, independent_varname,
compile_params, output_table, **kwargs):
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index 9526c91..37afad9 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -205,33 +205,6 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_predict(
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-
-CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate(
- model_table VARCHAR,
- test_table VARCHAR,
- id_col VARCHAR,
- model_arch_table VARCHAR,
- model_arch_id INTEGER,
- dependent_varname VARCHAR,
- independent_varname VARCHAR,
- compile_params VARCHAR,
- output_table VARCHAR
-) RETURNS VOID AS $$
- PythonFunctionBodyOnly(`deep_learning', `madlib_keras')
- with AOControl(False):
- madlib_keras.evaluate(schema_madlib,
- model_table,
- test_table,
- id_col,
- model_arch_table,
- model_arch_id,
- dependent_varname,
- independent_varname,
- compile_params,
- output_table)
-$$ LANGUAGE plpythonu VOLATILE
-m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_evaluate1(
model_table VARCHAR,
test_table VARCHAR,