You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2018/04/04 20:50:40 UTC
[1/2] madlib git commit: MLP: Allow one-hot encoded dependent var for
classification
Repository: madlib
Updated Branches:
refs/heads/master ab7166ff4 -> 1670923bf
MLP: Allow one-hot encoded dependent var for classification
JIRA:MADLIB-1222
MLP currently automatically encodes categorical variables for
classification but does not allow already encoded arrays for dependent
variables in mlp_classification. This commit lets users have an already
encoded array for the dependent variable and train a model.
Closes #250
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/29fe759a
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/29fe759a
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/29fe759a
Branch: refs/heads/master
Commit: 29fe759ac2511024f1afab1f7da767e8a62ffd8c
Parents: ab7166f
Author: Nandish Jayaram <nj...@apache.org>
Authored: Tue Mar 20 15:43:25 2018 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Tue Apr 3 10:59:10 2018 -0700
----------------------------------------------------------------------
src/modules/convex/mlp_igd.cpp | 6 +-
src/modules/convex/task/mlp.hpp | 20 ++-
src/ports/postgres/modules/convex/mlp.sql_in | 3 +-
src/ports/postgres/modules/convex/mlp_igd.py_in | 146 ++++++++++++-------
.../postgres/modules/convex/test/mlp.sql_in | 48 +++++-
5 files changed, 156 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/29fe759a/src/modules/convex/mlp_igd.cpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/mlp_igd.cpp b/src/modules/convex/mlp_igd.cpp
index b8738d4..e914c41 100644
--- a/src/modules/convex/mlp_igd.cpp
+++ b/src/modules/convex/mlp_igd.cpp
@@ -384,7 +384,8 @@ internal_predict_mlp::run(AnyType &args) {
size_t numberOfStages = layerSizes.size()-1;
double is_classification = args[2].getAs<double>();
double activation = args[3].getAs<double>();
- bool get_class = is_classification && is_response;
+ int is_dep_var_array_for_classification = args[8].getAs<int>();
+ bool is_classification_response = is_classification && is_response;
model.rebind(&is_classification, &activation, &coeff.data()[0],
numberOfStages, &layerSizes.data()[0]);
@@ -393,7 +394,8 @@ internal_predict_mlp::run(AnyType &args) {
} catch (const ArrayWithNullException &e) {
return args[0];
}
- ColumnVector prediction = MLPTask::predict(model, indVar, get_class);
+ ColumnVector prediction = MLPTask::predict(model, indVar, is_classification_response,
+ is_dep_var_array_for_classification);
return prediction;
}
http://git-wip-us.apache.org/repos/asf/madlib/blob/29fe759a/src/modules/convex/task/mlp.hpp
----------------------------------------------------------------------
diff --git a/src/modules/convex/task/mlp.hpp b/src/modules/convex/task/mlp.hpp
index 8a68aaa..adf87a7 100644
--- a/src/modules/convex/task/mlp.hpp
+++ b/src/modules/convex/task/mlp.hpp
@@ -66,7 +66,8 @@ public:
static ColumnVector predict(
const model_type &model,
const independent_variables_type &x,
- const bool get_class);
+ const bool is_classification_response,
+ const bool is_dep_var_array_for_classification);
const static int RELU = 0;
const static int SIGMOID = 1;
@@ -219,17 +220,26 @@ ColumnVector
MLP<Model, Tuple>::predict(
const model_type &model,
const independent_variables_type &x,
- const bool get_class) {
+ const bool is_classification_response,
+ const bool is_dep_var_array_for_classification) {
std::vector<ColumnVector> net, o;
feedForward(model, x, net, o);
ColumnVector output = o.back();
- if(get_class){ // Return a length 1 array with the predicted index
+ if(is_classification_response){
int max_idx;
output.maxCoeff(&max_idx);
- output.resize(1);
- output[0] = (double) max_idx;
+ if(is_dep_var_array_for_classification) {
+ // Return the entire array, but with 1 for the class level with
+ // largest probability and 0s for the rest.
+ output.setZero();
+ output[max_idx] = 1;
+ } else {
+ // Return a length 1 array with the predicted index
+ output.resize(1);
+ output[0] = (double) max_idx;
+ }
}
return output;
}
http://git-wip-us.apache.org/repos/asf/madlib/blob/29fe759a/src/ports/postgres/modules/convex/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp.sql_in b/src/ports/postgres/modules/convex/mlp.sql_in
index 739007e..f153722 100644
--- a/src/ports/postgres/modules/convex/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/mlp.sql_in
@@ -1669,7 +1669,8 @@ CREATE FUNCTION MADLIB_SCHEMA.internal_predict_mlp(
layer_sizes DOUBLE PRECISION[],
is_response INTEGER,
x_means DOUBLE PRECISION[],
- x_stds DOUBLE PRECISION[]
+ x_stds DOUBLE PRECISION[],
+ array_dep_var_for_classification INTEGER
)
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
http://git-wip-us.apache.org/repos/asf/madlib/blob/29fe759a/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 4a1416c..800ec29 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -130,12 +130,14 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
num_input_nodes = get_col_dimension(source_table, independent_varname,
dim=2)
if is_classification:
- _assert(pp_summary_dict["class_values"],
- "MLP Error: The pre-processed table created using"
- " madlib.minibatch_preprocessor was probably run"
- " without casting depedent variable to ::TEXT.")
- classes = pp_summary_dict["class_values"]
- num_output_nodes = len(classes)
+ if pp_summary_dict["class_values"]:
+ classes = pp_summary_dict["class_values"]
+ num_output_nodes = len(classes)
+ else:
+ # Assume that the dependent variable is already one-hot-encoded
+ num_output_nodes = get_col_dimension(source_table,
+ dependent_varname,
+ dim=2)
else:
num_output_nodes = get_col_dimension(source_table,
dependent_varname, dim=2)
@@ -156,18 +158,27 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
dependent_type = get_expr_type(dependent_varname, source_table)
if is_classification:
- labels = plpy.execute("SELECT DISTINCT {0} FROM {1}".
- format(dependent_varname, source_table))
- num_output_nodes = len(labels)
- for label_obj in labels:
- label = _format_label(label_obj[dependent_varname])
- classes.append(label)
- classes.sort()
- level_vals_str = ','.join(["{0}={1}".format(
- col_dep_var_norm_new, str(c))
- for c in classes])
- # dependent_varname should be replaced with one-hot encoded varname
- dependent_varname = "ARRAY[{0}]::integer[]".format(level_vals_str)
+ # If dependent variable is an array during classification, assume
+ # that it is already one-hot-encoded.
+ if "[]" in dependent_type:
+ # We are now using tbl_data_scaled, so change the dependent
+ # varname accordingly.
+ dependent_varname = col_dep_var_norm_new
+ num_output_nodes = get_col_dimension(tbl_data_scaled,
+ dependent_varname)
+ else:
+ labels = plpy.execute("SELECT DISTINCT {0} FROM {1}".
+ format(dependent_varname, source_table))
+ num_output_nodes = len(labels)
+ for label_obj in labels:
+ label = _format_label(label_obj[dependent_varname])
+ classes.append(label)
+ classes.sort()
+ level_vals_str = ','.join(["{0}={1}".format(
+ col_dep_var_norm_new, str(c))
+ for c in classes])
+ # dependent_varname should be replaced with one-hot encoded varname
+ dependent_varname = "ARRAY[{0}]::integer[]".format(level_vals_str)
else:
if "[]" not in dependent_type:
dependent_varname = "ARRAY[" + col_dep_var_norm_new + "]"
@@ -665,15 +676,26 @@ def _validate_dependent_var(source_table, dependent_varname,
"Dependent variable column should be of numeric type.")
else:
if is_classification:
- # Currently, classification doesn't accept an
- # array for dep type in IGD
- _assert("[]" not in expr_type and expr_type in classification_types,
- "Dependent variable column should be of type: "
- "{0}".format(classification_types))
+ _assert(("[]" in expr_type \
+ and is_psql_numeric_type(expr_type[:-2]) \
+ and not _is_dep_var_multi_dim(dependent_varname, source_table) \
+ ) \
+ or expr_type in classification_types,
+ "Dependent variable column should either be a numeric 1-D"
+ " array, or be of type: {0}".format(classification_types))
else:
_assert("[]" in expr_type or is_psql_numeric_type(expr_type),
"Dependent variable column should be of numeric type.")
+def _is_dep_var_multi_dim(dependent_varname, source_table):
+ # Check if dependent variable is an array of two or higher dimension
+ dep_array_sec_dim = plpy.execute("""
+ SELECT array_upper({0}, 2) AS n_y
+ FROM {1}
+ LIMIT 1
+ """.format(dependent_varname, source_table))
+ return bool(dep_array_sec_dim[0]['n_y'])
+
def _validate_params_based_on_minibatch(source_table, independent_varname,
dependent_varname, weights,
is_classification,
@@ -856,8 +878,16 @@ def mlp_predict(schema_madlib, model_table, data_table, id_col_name,
activation = _get_activation_index(summary['activation'])
layer_sizes = PY2SQL(
summary['layer_sizes'], array_type="DOUBLE PRECISION")
- is_classification = int(summary["is_classification"])
is_response = int(pred_type == 'response')
+ is_classification = int(summary["is_classification"])
+ classes = summary['classes']
+ # Set a flag to indicate that it is a classification model, with an array
+ # as the dependent var. The only scenario where classification allows for
+ # an array dep var is when the user has provided a one-hot encoded dep var
+ # during training, and mlp_classification does not one-hot encode
+ # (and hence classes column in model's summary table is NULL).
+ is_dep_var_an_array_for_classification = int(is_classification and not classes)
+
# Fix to ensure that 1.12 models run on 1.13 or higher.
# As a result of adding grouping support in 1.13, some changes were
# made wrt standardization.
@@ -922,7 +952,6 @@ def mlp_predict(schema_madlib, model_table, data_table, id_col_name,
else:
# if not grouping, then directly read out the coeff, mean
# and std values from the model and standardization tables.
-
if is_pre_113_model:
# Get mean and std from the summary table
standardization = plpy.execute("""
@@ -952,37 +981,22 @@ def mlp_predict(schema_madlib, model_table, data_table, id_col_name,
{layer_sizes},
{is_response},
{mean_col},
- {std_col}
+ {std_col},
+ {is_dep_var_an_array_for_classification}
)
""".format(**locals())
- if not is_classification:
- dependent_type = get_expr_type(dependent_varname, source_table)
- unnest_if_not_array = ""
- # Return the same type as the user provided. Internally we always
- # use an array, but if they provided a scalar, unnest it for
- # the user
- if "[]" not in dependent_type:
- unnest_if_not_array = "UNNEST"
- sql = header + """
- SELECT {grouping_col_comma}
- {id_col_name},
- {unnest_if_not_array}({predict_uda_query}) AS {pred_name}
- FROM {data_table}
- {join_str}
- {group_by_predict_str}
- """
- else:
- summary_query = """
- SELECT classes FROM {0}
- """.format(summary_table)
- classes = plpy.execute(summary_query)[0]['classes']
+ if is_classification:
if pred_type == "response":
- classes_with_index_table = unique_string()
- classes_table = unique_string()
+ if classes:
+ prediction_select_clause = "(ARRAY{0})[pred_idx[1]+1] AS {1}".format(classes, pred_name)
+ else:
+ # Case when the training step did not have to one-hot encode
+ # the dependent var.
+ prediction_select_clause = "pred_idx AS {0}".format(pred_name)
sql = header + """
SELECT {select_grouping_col}
q.{id_col_name},
- (ARRAY{classes})[pred_idx[1]+1] as {pred_name}
+ {prediction_select_clause}
FROM (
SELECT {grouping_col_comma}
{id_col_name},
@@ -994,11 +1008,16 @@ def mlp_predict(schema_madlib, model_table, data_table, id_col_name,
"""
else:
intermediate_col = unique_string()
- score_format = ',\n'.join([
- 'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
- format(j=i + 1, c_str=str(c).strip(' "'),
- interim=intermediate_col)
- for i, c in enumerate(classes)])
+ if classes:
+ score_format = ',\n'.join([
+ 'CAST({interim}[{j}] as DOUBLE PRECISION) as "estimated_prob_{c_str}"'.
+ format(j=i + 1, c_str=str(c).strip(' "'),
+ interim=intermediate_col)
+ for i, c in enumerate(classes)])
+ else:
+ # Case when the training step did not have to one-hot encode
+ # the dependent var.
+ score_format = '{0} AS estimated_prob'.format(intermediate_col)
sql = header + """
SELECT {select_grouping_col}
{id_col_name},
@@ -1012,6 +1031,23 @@ def mlp_predict(schema_madlib, model_table, data_table, id_col_name,
{group_by_predict_str}
) q
"""
+ else:
+ # Regression
+ dependent_type = get_expr_type(dependent_varname, source_table)
+ unnest_if_not_array = ""
+ # Return the same type as the user provided. Internally we always
+ # use an array, but if they provided a scalar, unnest it for
+ # the user
+ if "[]" not in dependent_type:
+ unnest_if_not_array = "UNNEST"
+ sql = header + """
+ SELECT {grouping_col_comma}
+ {id_col_name},
+ {unnest_if_not_array}({predict_uda_query}) AS {pred_name}
+ FROM {data_table}
+ {join_str}
+ {group_by_predict_str}
+ """
sql = sql.format(**locals())
plpy.execute(sql)
http://git-wip-us.apache.org/repos/asf/madlib/blob/29fe759a/src/ports/postgres/modules/convex/test/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in
index da41172..8a2c92b 100644
--- a/src/ports/postgres/modules/convex/test/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/test/mlp.sql_in
@@ -215,7 +215,6 @@ CREATE TABLE iris_data_batch_summary(
);
INSERT INTO iris_data_batch_summary VALUES
('iris_data','iris_data_batch','class::TEXT','attributes',30,ARRAY[1,2,3],141,0,'grp');
-
-- Create the corresponding standardization table for preprocessed data
CREATE TABLE iris_data_batch_standardization(
grp text,
@@ -227,7 +226,6 @@ INSERT INTO iris_data_batch_standardization VALUES
('1',ARRAY[5.74893617021,3.02482269504,3.6865248227,1.18014184397],ARRAY[0.785472439601,0.396287027644,1.68671151195,0.750245336531]),
('2',ARRAY[5.74893617021,3.02482269504,3.6865248227,1.18014184397],ARRAY[0.785472439601,0.396287027644,1.68671151195,0.750245336531]);
--- without minibatch without grouping and without warm start
DROP TABLE IF EXISTS mlp_class, mlp_class_summary, mlp_class_standardization;
SELECT mlp_classification(
'iris_data', -- Source table
@@ -246,7 +244,7 @@ SELECT mlp_classification(
False,
'grp'
);
-DROP TABLE IF EXISTS mlp_prediction_output, mlp_prediction_output_summary, mlp_prediction_output_standardization;;
+DROP TABLE IF EXISTS mlp_prediction_output;
SELECT mlp_predict(
'mlp_class',
'iris_data',
@@ -301,7 +299,9 @@ SELECT mlp_classification(
False,
'grp'
);
-DROP TABLE IF EXISTS mlp_prediction_batch_output, mlp_prediction_batch_output_summary, mlp_prediction_batch_output_standardization;
+
+DROP TABLE IF EXISTS mlp_prediction_batch_output, mlp_prediction_output;
+-- See prediction accuracy for training data
SELECT mlp_predict(
'mlp_class_batch',
'iris_data',
@@ -336,6 +336,46 @@ SELECT mlp_predict(
'response');
------------------------------------------------ Regression ------------------------------------------------------------
+DROP TABLE IF EXISTS mlp_class_batch, mlp_class_batch_summary, mlp_class_batch_standardization;
+
+-- Set class_values column value to NULL so that encoding info is not captured, to test
+-- case where dependent variable is an array for classification.
+UPDATE iris_data_batch_summary SET class_values = NULL WHERE source_table='iris_data';
+SELECT mlp_classification(
+ 'iris_data_batch', -- Source table
+ 'mlp_class_batch', -- Desination table
+ 'independent_varname', -- Input features
+ 'dependent_varname', -- Label
+ ARRAY[5], -- Number of units per layer
+ 'learning_rate_init=0.1,
+ learning_rate_policy=constant,
+ n_iterations=5,
+ tolerance=0,
+ n_epochs=20',
+ 'sigmoid',
+ '',
+ False,
+ False
+);
+DROP TABLE IF EXISTS mlp_prediction_batch_output, mlp_prediction_output;
+SELECT mlp_predict(
+ 'mlp_class_batch',
+ 'iris_data',
+ 'id',
+ 'mlp_prediction_batch_output',
+ 'response');
+SELECT * FROM mlp_prediction_batch_output;
+DROP TABLE IF EXISTS mlp_prediction_batch_output;
+SELECT mlp_predict(
+ 'mlp_class_batch',
+ 'iris_data',
+ 'id',
+ 'mlp_prediction_batch_output',
+ 'prob');
+SELECT * FROM mlp_prediction_batch_output;
+DROP TABLE IF EXISTS mlp_prediction_batch_output;
+DROP TABLE IF EXISTS mlp_class_batch, mlp_class_batch_summary, mlp_class_batch_standardization;
+
DROP TABLE IF EXISTS lin_housing_wi CASCADE;
CREATE TABLE lin_housing_wi (id serial, x float8[], grp int, y float8);
[2/2] madlib git commit: MLP: Remove minibatch training dependency on
original source
Posted by nj...@apache.org.
MLP: Remove minibatch training dependency on original source
The original source table used in minibatch preprocessor is stored in a
column named 'original_source_table' in the summary table. Ideally, this
table doesn't need to exist after the preprocessed table is created. The
current MLP training code had a dependency on it and would fail if that
table was deleted. This commit removes that dependency.
Co-authored-by: Nikhil Kak <nk...@pivotal.io>
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/1670923b
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/1670923b
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/1670923b
Branch: refs/heads/master
Commit: 1670923bf155395caf51cd7b07edbe9533a6908b
Parents: 29fe759
Author: Nandish Jayaram <nj...@apache.org>
Authored: Tue Apr 3 14:32:03 2018 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Tue Apr 3 17:29:11 2018 -0700
----------------------------------------------------------------------
src/ports/postgres/modules/convex/mlp_igd.py_in | 22 ++++++++++++++------
.../postgres/modules/convex/test/mlp.sql_in | 9 +++++++-
2 files changed, 24 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/1670923b/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 800ec29..687011c 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -141,9 +141,14 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
else:
num_output_nodes = get_col_dimension(source_table,
dependent_varname, dim=2)
- # Get the type of the original source table's dependent variable column.
- dependent_type = get_expr_type(pp_summary_dict['dependent_varname'],
- pp_summary_dict['source_table'])
+
+ # This variable is used for creating the classes_str column in the model
+ # summary table. We append [] when we create this column in the create
+ # summary table command so we need to strip it out here.
+ dependent_type = get_expr_type(mlp_preprocessor.CLASS_VALUES,
+ mlp_preprocessor.summary_table)
+ if dependent_type[-2:] == '[]':
+ dependent_type = dependent_type[:-2]
else:
x_mean_table = unique_string(desp='x_mean_table')
tbl_data_scaled = unique_string(desp="tbl_data_scaled")
@@ -184,6 +189,7 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
dependent_varname = "ARRAY[" + col_dep_var_norm_new + "]"
num_output_nodes = get_col_dimension(tbl_data_scaled,
dependent_varname, dim=1)
+
# Need layers sizes before validating for warm_start
layer_sizes = [num_input_nodes] + hidden_layer_sizes + [num_output_nodes]
col_grp_key = unique_string(desp='col_grp_key')
@@ -451,8 +457,10 @@ def _create_summary_table(args):
if args['warm_start']:
plpy.execute("DROP TABLE IF EXISTS {0}".format(args['summary_table']))
+
classes_str = PY2SQL([strip_end_quotes(cl, "'") for cl in args['classes']],
array_type=args['dependent_type'])
+
minibatch_summary_col_names = ''
minibatch_summary_col_vals = ''
if args['is_minibatch_enabled']:
@@ -678,7 +686,7 @@ def _validate_dependent_var(source_table, dependent_varname,
if is_classification:
_assert(("[]" in expr_type \
and is_psql_numeric_type(expr_type[:-2]) \
- and not _is_dep_var_multi_dim(dependent_varname, source_table) \
+ and not _get_dep_var_second_dim(dependent_varname, source_table) \
) \
or expr_type in classification_types,
"Dependent variable column should either be a numeric 1-D"
@@ -687,14 +695,16 @@ def _validate_dependent_var(source_table, dependent_varname,
_assert("[]" in expr_type or is_psql_numeric_type(expr_type),
"Dependent variable column should be of numeric type.")
-def _is_dep_var_multi_dim(dependent_varname, source_table):
+def _get_dep_var_second_dim(dependent_varname, source_table):
# Check if dependent variable is an array of two or higher dimension
+ # Return back the value of the second dimension, returns None if it less
+ # than 2-D.
dep_array_sec_dim = plpy.execute("""
SELECT array_upper({0}, 2) AS n_y
FROM {1}
LIMIT 1
""".format(dependent_varname, source_table))
- return bool(dep_array_sec_dim[0]['n_y'])
+ return dep_array_sec_dim[0]['n_y']
def _validate_params_based_on_minibatch(source_table, independent_varname,
dependent_varname, weights,
http://git-wip-us.apache.org/repos/asf/madlib/blob/1670923b/src/ports/postgres/modules/convex/test/mlp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in
index 8a2c92b..a40d35a 100644
--- a/src/ports/postgres/modules/convex/test/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/test/mlp.sql_in
@@ -213,8 +213,15 @@ CREATE TABLE iris_data_batch_summary(
num_rows_skipped integer,
grouping_cols text
);
+-- The availability of the original source table should not be a condition for
+-- MLP to work correctly. It should work fine even the original source table is
+-- deleted (this basically ensures that all the necessary info is captured in
+-- the summary table). So name the original source table as
+-- 'iris_data_does_not_exist' instead of the original 'iris_data', to mimic the
+-- scenario where the original source table is deleted and MLP is trained with
+-- the preprocessed table.
INSERT INTO iris_data_batch_summary VALUES
-('iris_data','iris_data_batch','class::TEXT','attributes',30,ARRAY[1,2,3],141,0,'grp');
+('iris_data_does_not_exist','iris_data_batch','class::TEXT','attributes',30,ARRAY[1,2,3],141,0,'grp');
-- Create the corresponding standardization table for preprocessed data
CREATE TABLE iris_data_batch_standardization(
grp text,