You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2019/12/17 18:20:55 UTC
[madlib] 01/02: DL: Add asymmetric cluster check for predict
This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit ea570674c725b0226d7396ed1d3f1478dd701d95
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Mon Dec 2 20:40:03 2019 -0500
DL: Add asymmetric cluster check for predict
JIRA: MADLIB-1394 #close
This commit updates the madlib_keras_predict and associated functions to
use use_gpus parameter like the rest of the keras functions.
Current implementation does not allow for asymmetric gpu configurations.
Closes #463
---
.../modules/deep_learning/madlib_keras.py_in | 8 +--
.../modules/deep_learning/madlib_keras.sql_in | 59 +++++++++-------------
.../madlib_keras_fit_multiple_model.py_in | 3 +-
.../deep_learning/madlib_keras_helper.py_in | 24 +++------
.../deep_learning/madlib_keras_predict.py_in | 45 ++++++++++-------
.../deep_learning/madlib_keras_wrapper.py_in | 26 ++--------
.../test/madlib_keras_model_averaging_e2e.sql_in | 4 +-
.../deep_learning/test/madlib_keras_predict.sql_in | 28 +++++-----
.../test/madlib_keras_predict_byom.sql_in | 8 +--
.../test/unit_tests/test_madlib_keras.py_in | 35 ++++++-------
10 files changed, 106 insertions(+), 134 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 9ed02d1..7502a6a 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -342,9 +342,9 @@ def get_initial_weights(model_table, model_arch, serialized_weights, warm_start,
@param warm_start: Boolean flag indicating warm start or not.
"""
if is_platform_pg():
- _ = get_device_name_and_set_cuda_env(use_gpus, accessible_gpus_for_seg[0], None)
+ _ = get_device_name_and_set_cuda_env(accessible_gpus_for_seg[0], None)
else:
- _ = get_device_name_and_set_cuda_env(False, 0, None)
+ _ = get_device_name_and_set_cuda_env(0, None)
if warm_start:
serialized_weights = plpy.execute("""
@@ -478,7 +478,7 @@ def fit_transition(state, dependent_var, independent_var, dependent_var_shape,
if not independent_var or not dependent_var:
return state
SD = kwargs['SD']
- device_name = get_device_name_and_set_cuda_env(use_gpus, accessible_gpus_for_seg[current_seg_id], current_seg_id)
+ device_name = get_device_name_and_set_cuda_env(accessible_gpus_for_seg[current_seg_id], current_seg_id)
segment_model, sess = get_init_model_and_sess(SD, device_name,
accessible_gpus_for_seg[current_seg_id],
@@ -702,7 +702,7 @@ def internal_keras_eval_transition(state, dependent_var, independent_var,
use_gpus, accessible_gpus_for_seg,
is_final_iteration, **kwargs):
SD = kwargs['SD']
- device_name = get_device_name_and_set_cuda_env(use_gpus, accessible_gpus_for_seg[current_seg_id], current_seg_id)
+ device_name = get_device_name_and_set_cuda_env(accessible_gpus_for_seg[current_seg_id], current_seg_id)
agg_loss, agg_metric, agg_image_count = state
# This transition function is common to evaluate as well as the fit functions
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index be5449a..7de95bc 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -533,7 +533,7 @@ madlib_keras_predict(
independent_varname,
output_table,
pred_type,
- gpus_per_host,
+ use_gpus,
mst_key
)
</pre>
@@ -599,22 +599,17 @@ madlib_keras_predict(
and 'prob' gives the probability value for each class.
</DD>
- <DT>gpus_per_host (optional)</DT>
- <DD>INTEGER, default: 0 (i.e., CPU).
- Number of GPUs per segment host to be used
- for training the neural network.
- For example, if you specify 4 for this parameter
- and your database cluster is set up to have 4
- segments per segment host, it means that each
- segment will have a dedicated GPU.
- A value of 0 means that CPUs, not GPUs, will
- be used for training.
+ <DT>use_gpus(optional)</DT>
+ <DD>BOOLEAN, default: FALSE (i.e., CPU).
+ Flag to enable GPU support for training neural network.
+ The number of GPUs to use is determined by the parameters
+ passed to the preprocessor.
@note
We have seen some memory related issues when segments
share GPU resources.
- For example, if you specify 1 for this parameter
- and your database cluster is set up to have 4
+ For example, if you provide 1 GPU and your
+ database cluster is set up to have 4
segments per segment host, it means that all 4
segments on a segment host will share the same
GPU. The current recommended
@@ -642,7 +637,7 @@ madlib_keras_predict_byom(
independent_varname,
output_table,
pred_type,
- gpus_per_host,
+ use_gpus,
class_values,
normalizing_const
)
@@ -714,22 +709,17 @@ madlib_keras_predict_byom(
gives the actual prediction and 'prob' gives the probability value for each class.
</DD>
- <DT>gpus_per_host (optional)</DT>
- <DD>INTEGER, default: 0 (i.e., CPU).
- Number of GPUs per segment host to be used
- for training the neural network.
- For example, if you specify 4 for this parameter
- and your database cluster is set up to have 4
- segments per segment host, it means that each
- segment will have a dedicated GPU.
- A value of 0 means that CPUs, not GPUs, will
- be used for training.
+ <DT>use_gpus(optional)</DT>
+ <DD>BOOLEAN, default: FALSE (i.e., CPU).
+ Flag to enable GPU support for training neural network.
+ The number of GPUs to use is determined by the parameters
+ passed to the preprocessor.
@note
We have seen some memory related issues when segments
share GPU resources.
- For example, if you specify 1 for this parameter
- and your database cluster is set up to have 4
+ For example, if you provide 1 GPU and your
+ database cluster is set up to have 4
segments per segment host, it means that all 4
segments on a segment host will share the same
GPU. The current recommended
@@ -1805,7 +1795,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
- gpus_per_host INTEGER,
+ use_gpus BOOLEAN,
mst_key INTEGER
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_predict')
@@ -1817,7 +1807,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
independent_varname,
output_table,
pred_type,
- gpus_per_host,
+ use_gpus,
mst_key)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -1829,7 +1819,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
- gpus_per_host INTEGER
+ use_gpus BOOLEAN
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, $6, $7, NULL);
$$ LANGUAGE sql VOLATILE
@@ -1843,7 +1833,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
output_table VARCHAR,
pred_type VARCHAR
) RETURNS VOID AS $$
- SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, $6, 0, NULL);
+ SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, $6, FALSE, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
@@ -1854,7 +1844,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict(
independent_varname VARCHAR,
output_table VARCHAR
) RETURNS VOID AS $$
- SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, NULL, 0, NULL);
+ SELECT MADLIB_SCHEMA.madlib_keras_predict($1, $2, $3, $4, $5, NULL, FALSE, NULL);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
@@ -1867,6 +1857,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_keras_predict(
current_seg_id INTEGER,
seg_ids INTEGER[],
images_per_seg INTEGER[],
+ use_gpus BOOLEAN,
gpus_per_host INTEGER,
segments_per_host INTEGER
) RETURNS DOUBLE PRECISION[] AS $$
@@ -1884,7 +1875,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
- gpus_per_host INTEGER,
+ use_gpus BOOLEAN,
class_values TEXT[],
normalizing_const DOUBLE PRECISION
) RETURNS VOID AS $$
@@ -1902,7 +1893,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
- gpus_per_host INTEGER,
+ use_gpus BOOLEAN,
class_values TEXT[]
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL);
@@ -1918,7 +1909,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.madlib_keras_predict_byom(
independent_varname VARCHAR,
output_table VARCHAR,
pred_type VARCHAR,
- gpus_per_host INTEGER
+ use_gpus BOOLEAN
) RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.madlib_keras_predict_byom($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL);
$$ LANGUAGE sql VOLATILE
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index f889980..5ce555a 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -214,6 +214,7 @@ class FitMultipleModel():
mst_metric = self.valid_mst_metric
seg_ids = self.dist_key_mapping_valid
images_per_seg = self.images_per_seg_valid
+ self.info_str += "\n\tValidation set after iteration {0}:".format(epoch)
for mst in self.msts:
weights = query_weights(self.model_output_table, self.model_weights_col,
self.mst_key_col, mst[self.mst_key_col])
@@ -226,7 +227,7 @@ class FitMultipleModel():
self.use_gpus,
self.accessible_gpus_for_seg,
seg_ids,
- self.images_per_seg_train,
+ images_per_seg,
[], [], epoch, True)
mst_metric_eval_time[mst[self.mst_key_col]] \
.append(metric_eval_time)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
index 3dcc572..5f91255 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_helper.py_in
@@ -108,18 +108,6 @@ def strip_trailing_nulls_from_class_values(class_values):
class_values = class_values[:num_of_valid_class_values]
return class_values
-def get_image_count_per_seg_from_array_predict(current_seg_id, seg_ids, images_per_seg):
- """
- Get the image count from the array containing all the images
- per segment. Based on the platform, we find the index of the current segment.
- This function is only called from inside the transition function.
- """
- if is_platform_pg():
- total_images = images_per_seg[0]
- else:
- total_images = images_per_seg[seg_ids.index(current_seg_id)]
- return total_images
-
def get_image_count_per_seg_from_array(current_seg_id, images_per_seg):
"""
Get the image count from the array containing all the images
@@ -205,12 +193,12 @@ def get_image_count_per_seg_for_non_minibatched_data_from_db(table_name):
else:
# Compute total buffers on each segment
images_per_seg = plpy.execute(
- """ SELECT gp_segment_id, count(*) AS images_per_seg
- FROM {0}
- GROUP BY gp_segment_id
- """.format(table_name))
- seg_ids = [int(image["gp_segment_id"]) for image in images_per_seg]
- gp_segment_id_col = '{0}.gp_segment_id'.format(table_name)
+ """ SELECT {0}, count(*) AS images_per_seg
+ FROM {1}
+ GROUP BY {0}
+ """.format(GP_SEGMENT_ID_COLNAME, table_name))
+ seg_ids = [int(image[GP_SEGMENT_ID_COLNAME]) for image in images_per_seg]
+ gp_segment_id_col = '{0}.{1}'.format(table_name,GP_SEGMENT_ID_COLNAME)
images_per_seg = [int(image["images_per_seg"]) for image in images_per_seg]
return gp_segment_id_col, seg_ids, images_per_seg
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index cf06e45..16ae7b1 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -41,7 +41,7 @@ from madlib_keras_wrapper import *
class BasePredict():
def __init__(self, schema_madlib, table_to_validate, test_table, id_col,
- independent_varname, output_table, pred_type, gpus_per_host):
+ independent_varname, output_table, pred_type, use_gpus, module_name):
self.schema_madlib = schema_madlib
self.table_to_validate = table_to_validate
self.test_table = test_table
@@ -49,14 +49,23 @@ class BasePredict():
self.independent_varname = independent_varname
self.output_table = output_table
self.pred_type = pred_type
- self.gpus_per_host = gpus_per_host
- self._set_default_gpus_pred_type()
+ self.module_name = module_name
+
+ self.segments_per_host = get_segments_per_host()
+ self.use_gpus = use_gpus if use_gpus else False
+ if self.use_gpus:
+ accessible_gpus_for_seg = get_accessible_gpus_for_seg(schema_madlib, self.segments_per_host, self.module_name)
+ _assert(len(set(accessible_gpus_for_seg)) == 1,
+ '{0}: Asymmetric gpu configurations are not supported'.format(self.module_name))
+ self.gpus_per_host = accessible_gpus_for_seg[0]
+ else:
+ self.gpus_per_host = 0
+
+ self._set_default_pred_type()
- def _set_default_gpus_pred_type(self):
+ def _set_default_pred_type(self):
self.pred_type = 'response' if not self.pred_type else self.pred_type
self.is_response = True if self.pred_type == 'response' else False
- self.gpus_per_host = 0 if self.gpus_per_host is None else self.gpus_per_host
-
def call_internal_keras(self):
if self.is_response:
@@ -104,6 +113,7 @@ class BasePredict():
{gp_segment_id_col},
ARRAY{seg_ids_test},
ARRAY{images_per_seg_test},
+ {self.use_gpus},
{self.gpus_per_host},
{segments_per_host})
) AS {intermediate_col}
@@ -138,7 +148,7 @@ class BasePredict():
class Predict(BasePredict):
def __init__(self, schema_madlib, model_table,
test_table, id_col, independent_varname,
- output_table, pred_type, gpus_per_host,
+ output_table, pred_type, use_gpus,
mst_key, **kwargs):
self.module_name = 'madlib_keras_predict'
@@ -151,7 +161,7 @@ class Predict(BasePredict):
BasePredict.__init__(self, schema_madlib, model_table, test_table,
id_col, independent_varname,
output_table, pred_type,
- gpus_per_host)
+ use_gpus, self.module_name)
param_proc = PredictParamsProcessor(self.model_table, self.module_name, self.mst_key)
if self.is_mult_model:
self.temp_summary_view = param_proc.model_summary_table
@@ -194,7 +204,7 @@ class Predict(BasePredict):
class PredictBYOM(BasePredict):
def __init__(self, schema_madlib, model_arch_table, model_id,
test_table, id_col, independent_varname, output_table,
- pred_type, gpus_per_host, class_values, normalizing_const,
+ pred_type, use_gpus, class_values, normalizing_const,
**kwargs):
self.module_name='madlib_keras_predict_byom'
@@ -205,13 +215,13 @@ class PredictBYOM(BasePredict):
self.dependent_varname = 'dependent_var'
BasePredict.__init__(self, schema_madlib, model_arch_table,
test_table, id_col, independent_varname,
- output_table, pred_type, gpus_per_host)
+ output_table, pred_type, use_gpus, self.module_name)
if self.is_response:
self.dependent_vartype = 'text'
else:
self.dependent_vartype = 'double precision'
## Set default values for norm const and class_values
- # gpus_per_host and pred_type are defaulted in base_predict's init
+ # use_gpus and pred_type are defaulted in base_predict's init
self.normalizing_const = normalizing_const
if self.normalizing_const is None:
self.normalizing_const = DEFAULT_NORMALIZING_CONST
@@ -244,14 +254,13 @@ class PredictBYOM(BasePredict):
def internal_keras_predict(independent_var, model_architecture, model_weights,
is_response, normalizing_const, current_seg_id, seg_ids,
- images_per_seg, gpus_per_host, segments_per_host,
+ images_per_seg, use_gpus, gpus_per_host, segments_per_host,
**kwargs):
SD = kwargs['SD']
model_key = 'segment_model_predict'
row_count_key = 'row_count'
try:
- device_name = get_device_name_and_set_cuda_env_predict(gpus_per_host,
- current_seg_id)
+ device_name = get_device_name_and_set_cuda_env( gpus_per_host, current_seg_id)
if model_key not in SD:
set_keras_session(device_name, gpus_per_host, segments_per_host)
model = model_from_json(model_architecture)
@@ -284,7 +293,7 @@ def internal_keras_predict(independent_var, model_architecture, model_weights,
# and not mini-batched, this list contains exactly one list in it,
# so return back the first list in probs.
result = probs[0]
- total_images = get_image_count_per_seg_from_array_predict(current_seg_id, seg_ids,
+ total_images = get_image_count_per_seg_from_array(seg_ids.index(current_seg_id),
images_per_seg)
if SD[row_count_key] == total_images:
@@ -335,8 +344,7 @@ For more details on function usage:
variables in the test table
output_table, -- Name of the output table
pred_type, -- The type of the desired output
- gpus_per_host, -- Number of GPUs per segment host to
- be used for training
+ use_gpus, -- Flag for enabling GPU support
mst_key -- Identifier for the desired model out of multimodel
training output
)
@@ -401,8 +409,7 @@ For more details on function usage:
variables in the test table
output_table, -- Name of the output table
pred_type, -- The type of the desired output
- gpus_per_host, -- Number of GPUs per segment host to
- be used for training
+ use_gpus, -- Flag for enabling GPU support
class_values, -- List of class labels that were used while training the
model. If class_values is passed in as NULL, the output
table will have a column named 'prob' which is an array
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 518d20a..541d370 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -58,36 +58,20 @@ def reset_cuda_env(value):
if CUDA_VISIBLE_DEVICES_KEY in os.environ:
del os.environ[CUDA_VISIBLE_DEVICES_KEY]
-def get_device_name_and_set_cuda_env_predict(gpus_per_host, seg):
- if gpus_per_host > 0:
+def get_device_name_and_set_cuda_env(gpu_count, seg):
+
+ if gpu_count > 0:
device_name = '/gpu:0'
if is_platform_pg():
- cuda_visible_dev = ','.join([str(i) for i in range(gpus_per_host)])
+ cuda_visible_dev = ','.join([str(i) for i in range(gpu_count)])
else:
- cuda_visible_dev = str(seg % gpus_per_host)
+ cuda_visible_dev = str(seg % gpu_count)
set_cuda_env(cuda_visible_dev)
else: # cpu only
device_name = '/cpu:0'
set_cuda_env('-1')
return device_name
-def get_device_name_and_set_cuda_env(use_gpus, gpu_count, seg):
-
- if use_gpus:
- if gpu_count > 0:
- device_name = '/gpu:0'
- if is_platform_pg():
- cuda_visible_dev = ','.join([str(i) for i in range(gpu_count)])
- else:
- cuda_visible_dev = str(seg % gpu_count)
- set_cuda_env(cuda_visible_dev)
- else:
- plpy.error("No gpus found on {}".format(seg))
- else: # cpu only
- device_name = '/cpu:0'
- set_cuda_env('-1')
- return device_name
-
def set_keras_session(device_name, gpu_count, segments_per_host):
with K.tf.device(device_name):
session = get_keras_session(device_name, gpu_count, segments_per_host)
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
index 66663f6..d246f8d 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
@@ -68,7 +68,7 @@ SELECT madlib_keras_predict(
'attributes',
'iris_predict',
'prob',
- 0);
+ FALSE);
-- Run Evaluate
DROP TABLE IF EXISTS evaluate_out;
@@ -123,7 +123,7 @@ SELECT madlib_keras_predict(
'attributes',
'iris_predict',
'prob',
- 0);
+ FALSE);
-- Run Evaluate
DROP TABLE IF EXISTS evaluate_out;
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in
index 5d52574..c15942c 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict.sql_in
@@ -54,8 +54,8 @@ SELECT assert(trap_error($TRAP$madlib_keras_predict(
'x',
'cifar10_predict_gpu',
NULL,
- 2);$TRAP$) = 1,
- 'Prediction with gpus_per_host=2 must error out.');
+ TRUE);$TRAP$) = 1,
+ 'Prediction with use_gpus=True must error out.');
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
@@ -65,7 +65,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
NULL,
- 0);
+ FALSE);
-- Validate that prediction output table exists and has correct schema
SELECT assert(UPPER(pg_typeof(id)::TEXT) = 'INTEGER', 'id column should be INTEGER type')
@@ -92,7 +92,7 @@ SELECT assert(trap_error($TRAP$madlib_keras_predict(
'x',
'cifar10_predict',
NULL,
- 0);$TRAP$) = 1,
+ FALSE);$TRAP$) = 1,
'Passing batched image table to predict should error out.');
-- Test with pred_type=prob
@@ -104,7 +104,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
SELECT assert(UPPER(pg_typeof(prob_0)::TEXT) =
'DOUBLE PRECISION', 'column prob_0 should be double precision type')
@@ -185,7 +185,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction columns
-- for prediction type = 'prob' and class_values 'TEXT' with NULL as a valid
@@ -216,7 +216,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'response',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction columns
-- for prediction type = 'response' and class_values 'TEXT' with NULL
@@ -238,7 +238,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'response' and class_value = NULL
@@ -256,7 +256,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'response',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'response' and class_value = NULL
@@ -283,7 +283,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'prob' and class_values 'INT' with NULL
@@ -306,7 +306,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'response',
- 0);
+ FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'response' and class_values 'TEXT' with NULL
@@ -337,7 +337,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
-- Prediction with incorrectly shaped data must error out.
DROP TABLE IF EXISTS cifar10_predict;
@@ -348,7 +348,7 @@ SELECT assert(trap_error($TRAP$madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);$TRAP$) = 1,
+ FALSE);$TRAP$) = 1,
'Input shape is (32, 32, 3) but model was trained with (3, 32, 32). Should have failed.');
-- Test model_arch is retrieved from model data table and not model architecture
@@ -361,7 +361,7 @@ SELECT madlib_keras_predict(
'x',
'cifar10_predict',
'prob',
- 0);
+ FALSE);
-- Test multi model
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict_byom.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict_byom.sql_in
index 10c6087..12dee6f 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict_byom.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_predict_byom.sql_in
@@ -55,7 +55,7 @@ SELECT madlib_keras_predict_byom(
'attributes',
'iris_predict_byom',
'response',
- -1,
+ NULL,
ARRAY['Iris-setosa', 'Iris-versicolor',
'Iris-virginica']
);
@@ -99,7 +99,7 @@ SELECT madlib_keras_predict_byom(
'attributes',
'iris_predict_byom',
'prob',
- -1,
+ NULL,
ARRAY['Iris-setosa', 'Iris-versicolor',
'Iris-virginica'],
1.0
@@ -124,7 +124,7 @@ SELECT madlib_keras_predict_byom(
'attributes',
'iris_predict_byom',
'prob',
- 0,
+ NULL,
NULL
);
SELECT assert(
@@ -134,4 +134,4 @@ FROM iris_predict_byom;
SELECT assert(UPPER(pg_typeof(prob)::TEXT) = 'DOUBLE PRECISION[]',
'Predict byom failure for null class value and prob pred_type. Expeceted prob to
be of type DOUBLE PRECISION[]')
-FROM iris_predict_byom LIMIT 1;
\ No newline at end of file
+FROM iris_predict_byom LIMIT 1;
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 6097286..a8bb629 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -555,7 +555,7 @@ class InternalKerasPredictTestCase(unittest.TestCase):
result = self.subject.internal_keras_predict(
self.independent_var, self.model.to_json(),
serialized_weights, is_response, 255, 0, self.all_seg_ids,
- self.total_images_per_seg, 0, 4, **k)
+ self.total_images_per_seg, False, 0, 4, **k)
self.assertEqual(1, len(result))
self.assertEqual(1, k['SD']['row_count'])
self.assertEqual(True, 'segment_model_predict' in k['SD'])
@@ -568,7 +568,7 @@ class InternalKerasPredictTestCase(unittest.TestCase):
is_response = True
result = self.subject.internal_keras_predict(
self.independent_var, None, None, is_response, 255, 0,
- self.all_seg_ids, self.total_images_per_seg, 0, 4, **k)
+ self.all_seg_ids, self.total_images_per_seg, False, 0, 4, **k)
self.assertEqual(1, len(result))
self.assertEqual(2, k['SD']['row_count'])
self.assertEqual(True, 'segment_model_predict' in k['SD'])
@@ -582,7 +582,7 @@ class InternalKerasPredictTestCase(unittest.TestCase):
is_response = True
result = self.subject.internal_keras_predict(
self.independent_var, None, None, is_response, 255, 0,
- self.all_seg_ids, self.total_images_per_seg, 0, 4, **k)
+ self.all_seg_ids, self.total_images_per_seg, False, 0, 4, **k)
self.assertEqual(1, len(result))
self.assertEqual(False, 'row_count' in k['SD'])
self.assertEqual(False, 'segment_model_predict' in k['SD'])
@@ -592,7 +592,7 @@ class InternalKerasPredictTestCase(unittest.TestCase):
is_response = False
result = self.subject.internal_keras_predict(
self.independent_var, None, None, is_response, 255, 0,
- self.all_seg_ids, self.total_images_per_seg, 0, 4, **k)
+ self.all_seg_ids, self.total_images_per_seg, False, 0, 4, **k)
# we except len(result) to be 3 because we have 3 dense layers in the
# architecture
@@ -613,7 +613,7 @@ class InternalKerasPredictTestCase(unittest.TestCase):
with self.assertRaises(plpy.PLPYException):
self.subject.internal_keras_predict(
self.independent_var, None, None, is_response, normalizing_const,
- 0, self.all_seg_ids, self.total_images_per_seg, 0, 4, **k)
+ 0, self.all_seg_ids, self.total_images_per_seg, False, 0, 4, **k)
self.assertEqual(False, 'row_count' in k['SD'])
self.assertEqual(False, 'segment_model_predict' in k['SD'])
@@ -637,7 +637,7 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
self.model.add(Dense(self.num_classes))
self.pred_type = 'prob'
- self.gpus_per_host = 2
+ self.use_gpus = False
self.class_values = ['foo', 'bar', 'baaz', 'foo2', 'bar2']
self.normalizing_const = 255.0
@@ -653,12 +653,13 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
self.module_patcher.stop()
def test_predictbyom_defaults_1(self):
+ self.module.get_accessible_gpus_for_seg = Mock(return_value = [2,2,2])
res = self.module.PredictBYOM('schema_madlib', 'model_arch_table',
'model_id', 'test_table', 'id_col',
'independent_varname', 'output_table', None,
- None, None, None)
+ True, None, None)
self.assertEqual('response', res.pred_type)
- self.assertEqual(0, res.gpus_per_host)
+ self.assertEqual(2, res.gpus_per_host)
self.assertEqual([0,1,2,3,4], res.class_values)
self.assertEqual(1.0, res.normalizing_const)
self.assertEqual('text', res.dependent_vartype)
@@ -667,10 +668,10 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
res = self.module.PredictBYOM('schema_madlib', 'model_arch_table',
'model_id', 'test_table', 'id_col',
'independent_varname', 'output_table',
- self.pred_type, self.gpus_per_host,
+ self.pred_type, self.use_gpus,
self.class_values, self.normalizing_const)
self.assertEqual('prob', res.pred_type)
- self.assertEqual(2, res.gpus_per_host)
+ self.assertEqual(0, res.gpus_per_host)
self.assertEqual(['foo', 'bar', 'baaz', 'foo2', 'bar2'], res.class_values)
self.assertEqual(255.0, res.normalizing_const)
self.assertEqual('double precision', res.dependent_vartype)
@@ -680,7 +681,7 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
self.module.PredictBYOM('schema_madlib', 'model_arch_table',
'model_id', 'test_table', 'id_col',
'independent_varname', 'output_table',
- 'invalid_pred_type', self.gpus_per_host,
+ 'invalid_pred_type', self.use_gpus,
self.class_values, self.normalizing_const)
self.assertIn('invalid_pred_type', str(error.exception))
@@ -688,7 +689,7 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
self.module.PredictBYOM('schema_madlib', 'model_arch_table',
'model_id', 'test_table', 'id_col',
'independent_varname', 'output_table',
- self.pred_type, self.gpus_per_host,
+ self.pred_type, self.use_gpus,
["foo", "bar", "baaz"], self.normalizing_const)
self.assertIn('class values', str(error.exception).lower())
@@ -696,7 +697,7 @@ class MadlibKerasPredictBYOMTestCase(unittest.TestCase):
self.module.PredictBYOM('schema_madlib', 'model_arch_table',
'model_id', 'test_table', 'id_col',
'independent_varname', 'output_table',
- self.pred_type, self.gpus_per_host,
+ self.pred_type, self.use_gpus,
self.class_values, 0)
self.assertIn('normalizing const', str(error.exception).lower())
@@ -751,12 +752,12 @@ class MadlibKerasWrapperTestCase(unittest.TestCase):
gpus_per_host = 3
self.assertEqual('/gpu:0', self.subject.get_device_name_and_set_cuda_env(
- True, gpus_per_host, seg_id ))
+ gpus_per_host, seg_id ))
self.assertEqual('0,1,2', os.environ['CUDA_VISIBLE_DEVICES'])
gpus_per_host = 0
self.assertEqual('/cpu:0', self.subject.get_device_name_and_set_cuda_env(
- False, gpus_per_host, seg_id ))
+ gpus_per_host, seg_id ))
self.assertEqual('-1', os.environ['CUDA_VISIBLE_DEVICES'])
def test_get_device_name_and_set_cuda_env_gpdb(self):
@@ -765,12 +766,12 @@ class MadlibKerasWrapperTestCase(unittest.TestCase):
seg_id=3
gpus_per_host=2
self.assertEqual('/gpu:0', self.subject.get_device_name_and_set_cuda_env(
- True, gpus_per_host, seg_id))
+ gpus_per_host, seg_id))
self.assertEqual('1', os.environ['CUDA_VISIBLE_DEVICES'])
gpus_per_host=0
self.assertEqual('/cpu:0', self.subject.get_device_name_and_set_cuda_env(
- False, gpus_per_host, seg_id))
+ gpus_per_host, seg_id))
self.assertEqual('-1', os.environ['CUDA_VISIBLE_DEVICES'])