You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@madlib.apache.org by nk...@apache.org on 2020/03/24 20:06:31 UTC

[madlib] branch master updated (41ace0b -> 01ede7c)

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git.


    from 41ace0b  DL: Don't include weights as part of state except for the last row.
     new 5a95e7c  Utilities: Improve rename table logic
     new ef763f1  Graph: Add select statements to graph tests
     new 4f62f4b  DL: Fix disk issue by using truncate guc
     new ec3eee7  DL: add tests for asserting that guc values are unchanged
     new 01ede7c  DL: Use unlogged tables for fit multiple

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../madlib_keras_fit_multiple_model.py_in          | 254 +++++++++++++++------
 .../madlib_keras_fit_multiple_model.sql_in         |   7 +
 .../deep_learning/test/madlib_keras_fit.sql_in     |   1 -
 .../test/madlib_keras_model_averaging_e2e.sql_in   |   7 +-
 .../test/madlib_keras_model_selection.sql_in       |  35 +++
 .../test/madlib_keras_model_selection_e2e.sql_in   |  25 +-
 .../test/madlib_keras_transfer_learning.sql_in     |  76 +++---
 src/ports/postgres/modules/graph/hits.py_in        |   2 +-
 src/ports/postgres/modules/graph/pagerank.py_in    |   2 +-
 src/ports/postgres/modules/graph/test/bfs.sql_in   |   2 +
 src/ports/postgres/modules/graph/test/hits.sql_in  |   2 +
 .../postgres/modules/graph/test/pagerank.sql_in    |   2 +
 src/ports/postgres/modules/graph/test/sssp.sql_in  |   1 +
 src/ports/postgres/modules/graph/test/wcc.sql_in   |   2 +
 src/ports/postgres/modules/graph/wcc.py_in         |   2 +-
 src/ports/postgres/modules/linalg/svd.py_in        |   2 +-
 .../modules/regress/clustered_variance.py_in       |   2 +-
 .../postgres/modules/regress/robust_logistic.py_in |   2 +-
 .../modules/regress/robust_mlogistic.py_in         |   2 +-
 src/ports/postgres/modules/utilities/control.py_in |   3 +-
 .../utilities/test/unit_tests/test_control.py_in   |  12 +-
 .../utilities/test/unit_tests/test_utilities.py_in |  28 +++
 .../postgres/modules/utilities/utilities.py_in     | 124 ++++++++++
 .../postgres/modules/utilities/utilities.sql_in    |  50 ++++
 .../postgres/modules/utilities/validate_args.py_in |  65 ------
 25 files changed, 520 insertions(+), 190 deletions(-)

[madlib] 01/05: Utilities: Improve rename table logic

Posted by nk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 5a95e7c93b4effed6191f508624d4dc7333f9987
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Mar 18 17:23:12 2020 -0700

    Utilities: Improve rename table logic
    
    JIRA: MADLIB-1406
    
    Consider the example
    "rename table foo to bar.foo2"
    
    Previously rename_table would fail if the table 'foo2' already existed on the
    search_path. This is because we first renamed the table foo to foo2 and then
    set the schema to bar.
    
    The new logic fixes this by renaming foo to a unique string, setting the schema
    to bar and then renaming the unique string to bar
    
    Also moved the function to utilities since the new logic required the use of
    unique_string. Also it seemed better suited to utilities.py instead of
    validate_args.py
    
    Co-authored-by: Domino Valdano <dv...@pivotal.io>
    Co-authored-by: Ekta Khanna <ek...@pivotal.io>
---
 src/ports/postgres/modules/graph/hits.py_in        |   2 +-
 src/ports/postgres/modules/graph/pagerank.py_in    |   2 +-
 src/ports/postgres/modules/graph/wcc.py_in         |   2 +-
 src/ports/postgres/modules/linalg/svd.py_in        |   2 +-
 .../modules/regress/clustered_variance.py_in       |   2 +-
 .../postgres/modules/regress/robust_logistic.py_in |   2 +-
 .../modules/regress/robust_mlogistic.py_in         |   2 +-
 .../postgres/modules/utilities/utilities.py_in     | 114 +++++++++++++++++++++
 .../postgres/modules/utilities/validate_args.py_in |  65 ------------
 9 files changed, 121 insertions(+), 72 deletions(-)

diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index f792f15..1283070 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -47,7 +47,7 @@ from utilities.utilities import is_platform_pg
 
 from utilities.validate_args import columns_exist_in_table, drop_tables
 from utilities.validate_args import get_cols_and_types, table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table,
                        edge_params, out_table, max_iter, threshold,
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index 41f25de..e732675 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -49,7 +49,7 @@ from utilities.utilities import py_list_to_sql_string
 
 from utilities.validate_args import columns_exist_in_table, get_cols_and_types
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 
 def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 4adc52e..28b06f5 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -37,7 +37,7 @@ from utilities.validate_args import columns_exist_in_table, get_expr_type
 from utilities.utilities import is_platform_pg
 from utilities.utilities import add_postfix
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.control import MinWarning
 from graph_utils import validate_graph_coding, get_graph_usage
 from graph_utils import validate_output_and_summary_tables
diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in
index 894fae2..bf0e522 100644
--- a/src/ports/postgres/modules/linalg/svd.py_in
+++ b/src/ports/postgres/modules/linalg/svd.py_in
@@ -11,7 +11,7 @@ from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from matrix_ops import get_dims
 from matrix_ops import validate_sparse
 from matrix_ops import validate_dense
diff --git a/src/ports/postgres/modules/regress/clustered_variance.py_in b/src/ports/postgres/modules/regress/clustered_variance.py_in
index 44b042d..8931066 100644
--- a/src/ports/postgres/modules/regress/clustered_variance.py_in
+++ b/src/ports/postgres/modules/regress/clustered_variance.py_in
@@ -8,7 +8,7 @@ from utilities.utilities import unique_string
 from utilities.utilities import _string_to_array_with_quotes
 from utilities.utilities import extract_keyvalue_params
 from utilities.validate_args import columns_exist_in_table
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.validate_args import table_is_empty
 from utilities.validate_args import table_exists
 from utilities.utilities import _assert
diff --git a/src/ports/postgres/modules/regress/robust_logistic.py_in b/src/ports/postgres/modules/regress/robust_logistic.py_in
index 189123b..4bc72a4 100644
--- a/src/ports/postgres/modules/regress/robust_logistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_logistic.py_in
@@ -11,7 +11,7 @@
 import plpy
 from utilities.utilities import _assert
 from utilities.utilities import unique_string
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 # use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0
 from utilities.utilities import __mad_version
diff --git a/src/ports/postgres/modules/regress/robust_mlogistic.py_in b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
index 897b849..ffabd37 100644
--- a/src/ports/postgres/modules/regress/robust_mlogistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
@@ -15,7 +15,7 @@ from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.validate_args import columns_exist_in_table
 
 from regress.robust_linear import _robust_linregr_validate
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 210fbd4..73ff894 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -15,6 +15,7 @@ from validate_args import input_tbl_valid
 from validate_args import is_var_valid
 from validate_args import output_tbl_valid
 from validate_args import quote_ident
+from validate_args import drop_tables
 import plpy
 
 
@@ -1162,3 +1163,116 @@ def rotate(l, n):
     """
     return l[-n:] + l[:-n]
 # ------------------------------------------------------------------------------
+
+def rename_table(schema_madlib, orig_name, new_name):
+    """
+    Renames possibly schema qualified table name to a new schema qualified name
+    ensuring the schema qualification are changed appropriately
+
+    Args:
+        @param orig_name: string, Original name of the table
+                          (must be schema qualified if table schema is not in search path)
+        @param new_name: string, New name of the table
+                          (can be schema qualified. If it is not then the original
+                           schema is maintained)
+    Returns:
+        String. The new table name qualified with the schema name
+    """
+    new_names_split = new_name.split(".")
+    if len(new_names_split) > 2:
+        raise AssertionError("Invalid table name")
+    new_table_name = new_names_split[-1]
+    new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
+
+    orig_names_split = orig_name.split(".")
+    if len(orig_names_split) > 2:
+        raise AssertionError("Invalid table name")
+
+    if len(orig_names_split) > 1:
+        orig_table_schema = orig_names_split[0]
+    else:
+        # we need to get the schema name of the original table if we are
+        # to change the schema of the new table. This is to ensure that we
+        # change the schema of the correct table in case there are multiple
+        # tables with the same new name.
+        orig_table_schema = get_first_schema(orig_name)
+
+    if orig_table_schema is None:
+        raise AssertionError("Relation {0} not found during rename".
+                             format(orig_name))
+    return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+                                        new_table_schema, new_table_name)
+
+def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+                                 new_table_schema, new_table_name):
+    """
+    Internal private function to perform the rename operation after all the
+    validation checks
+    """
+
+    """
+    CASE 1
+    If the output table is schema is pg_temp, we cannot alter table schemas from/to
+    temp schemas. If it looks like a temp schema, we stay safe and just use
+    create/drop
+        Test cases
+        foo.bar to pg_temp.bar
+        foo.bar to pg_temp.bar2
+        foo to pg_temp.bar
+        pg_temp.foo to pg_temp.bar
+    """
+    if new_table_schema and 'pg_temp' in new_table_schema:
+        """
+        If both new_table_schema and orig_table_schema have pg_temp in it,
+        just run an alter statement instead of CTAS. Without this, pca dev-check
+        fails on gpdb5/6 (but not on pg)
+        """
+        if new_table_schema != orig_table_schema:
+            plpy.info("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+                      .format(**locals()))
+            plpy.execute("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+                         .format(**locals()))
+            drop_tables([orig_name])
+            return new_name
+        else:
+            plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+                         format(**locals()))
+            return new_name
+
+    """
+    CASE 2
+    Do direct rename if the new table does not have an output schema or
+    if the new table schema is the same as the original table schema
+    Test Cases
+    rename foo to bar
+    rename foo.bar to foo.bar2
+    rename foo.bar to bar2
+    """
+    if not new_table_schema or new_table_schema == orig_table_schema:
+        plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+                     format(**locals()))
+        return orig_table_schema + "." + new_table_name
+
+    """
+    CASE 3
+    output table is schema qualified
+    1. rename the original table to an interim temp name
+    2. set the new schema on that interim table
+    3. rename interim table to the new table name
+    Test cases
+    foo.bar to foo2.bar2
+    foo.bar to foo2.bar
+    """
+    interim_temp_name = unique_string("rename_table_helper")
+    plpy.execute(
+        "ALTER TABLE {orig_name} RENAME to {interim_temp_name}".format(
+            **locals()))
+
+    plpy.execute(
+        """ALTER TABLE {interim_temp_name} SET SCHEMA {new_table_schema}""".format(
+            **locals()))
+
+    plpy.execute(
+        """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}"""
+        .format(**locals()))
+    return new_name
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index ac6bd4a..a21be58 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -164,71 +164,6 @@ def table_exists(tbl, only_first_schema=False):
 # -------------------------------------------------------------------------
 
 
-def rename_table(schema_madlib, orig_name, new_name):
-    """
-    Renames possibly schema qualified table name to a new schema qualified name
-    ensuring the schema qualification are changed appropriately
-
-    Args:
-        @param orig_name: string, Original name of the table
-                          (must be schema qualified if table schema is not in search path)
-        @param new_name: string, New name of the table
-                          (can be schema qualified. If it is not then the original
-                           schema is maintained)
-    Returns:
-        String. The new table name qualified with the schema name
-    """
-    new_names_split = new_name.split(".")
-    if len(new_names_split) > 2:
-        raise AssertionError("Invalid table name")
-    new_table_name = new_names_split[-1]
-    new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
-
-    orig_names_split = orig_name.split(".")
-    if len(orig_names_split) > 2:
-        raise AssertionError("Invalid table name")
-
-    if len(orig_names_split) > 1:
-        orig_table_schema = orig_names_split[0]
-    else:
-        # we need to get the schema name of the original table if we are
-        # to change the schema of the new table. This is to ensure that we
-        # change the schema of the correct table in case there are multiple
-        # tables with the same new name.
-        orig_table_schema = get_first_schema(orig_name)
-
-    if orig_table_schema is None:
-        raise AssertionError("Relation {0} not found during rename".
-                             format(orig_name))
-
-    plpy.execute("ALTER TABLE {orig_table} RENAME TO {new_table}".
-                 format(orig_table=orig_name, new_table=new_table_name))
-
-    if new_table_schema:
-        if new_table_schema != orig_table_schema:
-            # set schema only if a change in schema is required
-            before_schema_string = "{0}.{1}".format(orig_table_schema,
-                                                    new_table_name)
-
-            if 'pg_temp' in new_table_schema:
-                # We cannot alter table schemas from/to temp schemas
-                # If it looks like a temp schema, we stay safe and just use
-                # create/drop
-                plpy.execute("""CREATE TABLE {new_name} AS
-                                SELECT * FROM {new_table_name}""".
-                             format(**locals()))
-                drop_tables([new_table_name])
-            else:
-                plpy.execute("""ALTER TABLE {new_table}
-                                SET SCHEMA {schema_name}""".
-                             format(new_table=before_schema_string,
-                                    schema_name=new_table_schema))
-        return new_name
-    else:
-        return orig_table_schema + "." + new_table_name
-# -------------------------------------------------------------------------
-
-
 def get_first_schema(table_name):
     """
     Return first schema name from search path that contains given table.

[madlib] 02/05: Graph: Add select statements to graph tests

Posted by nk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit ef763f15c4f008a3ec9cf69b9898bf2918d4f9cf
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Mar 18 17:23:42 2020 -0700

    Graph: Add select statements to graph tests
    
    JIRA: MADLIB-1411
    
    A previous commit https://github.com/apache/madlib/commit/5e05f25e5ab93ff56121336c976523e248876e0a added tests for schema qualified output tables for all graph functions. This commit adds a select statement for the output tables to make sure that the right tables were created.
---
 src/ports/postgres/modules/graph/test/bfs.sql_in      | 2 ++
 src/ports/postgres/modules/graph/test/hits.sql_in     | 2 ++
 src/ports/postgres/modules/graph/test/pagerank.sql_in | 2 ++
 src/ports/postgres/modules/graph/test/sssp.sql_in     | 1 +
 src/ports/postgres/modules/graph/test/wcc.sql_in      | 2 ++
 5 files changed, 9 insertions(+)

diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in
index 7d04a86..46f97fb 100644
--- a/src/ports/postgres/modules/graph/test/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -296,6 +296,8 @@ CREATE TABLE e2 AS SELECT "SRC"::bigint, dest::bigint, weight FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT graph_bfs('v2',NULL,'e2','src="SRC"',3,'pg_temp.out2');
+SELECT count(*) from pg_temp.out2;
+SELECT * from pg_temp.out2_summary;
 
 -- Test for infinite paths
 DROP TABLE IF EXISTS out, out_summary, out_path;
diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in b/src/ports/postgres/modules/graph/test/hits.sql_in
index 5e474c6..2ef4d63 100644
--- a/src/ports/postgres/modules/graph/test/hits.sql_in
+++ b/src/ports/postgres/modules/graph/test/hits.sql_in
@@ -178,3 +178,5 @@ CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM edge;
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT hits('v2',NULL,'e2',NULL,'pg_temp.out2');
+SELECT count(*) from pg_temp.out2;
+SELECT * from pg_temp.out2_summary;
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index c560d4a..22794e9 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -214,3 +214,5 @@ CREATE TABLE e2 AS SELECT src::bigint, dest::bigint FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT pagerank('v2',NULL,'e2',NULL,'pg_temp.out2');
+SELECT count(*) from pg_temp.out2;
+SELECT * from pg_temp.out2_summary;
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in
index af0eb48..1a01c55 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -163,6 +163,7 @@ CREATE TABLE e2 AS SELECT src::bigint, dest::bigint, weight FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary, pg_temp.out2_path;
 SELECT graph_sssp('v2',NULL,'e2',NULL,0,'pg_temp.out2');
+SELECT count(*) from pg_temp.out2;
 SELECT graph_sssp_get_path('pg_temp.out2',5,'pg_temp.out2_path');
 
 -- Test for infinite paths
diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in
index 7e495cf..6dc5e7f 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in
@@ -181,3 +181,5 @@ CREATE TABLE e2 AS SELECT src_node::bigint, dest_node::bigint FROM "EDGE";
 
 DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT weakly_connected_components('v2',NULL,'e2','src=src_node,dest=dest_node','pg_temp.out2');
+SELECT count(*) from pg_temp.out2;
+SELECT count(*) from pg_temp.out2_summary;

[madlib] 04/05: DL: add tests for asserting that guc values are unchanged

Posted by nk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit ec3eee7a9bd428bbed18f82b37514c3ae717a361
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Thu Mar 19 12:27:11 2020 -0700

    DL: add tests for asserting that guc values are unchanged
    
    JIRA: MADLIB-1406
    
    This commit adds tests for making sure that the gpdb gucs that we use for
    constant folding and subtransaction truncate get reset after calling the
    respective fit functions.
    
    1. Added a few helper functions to check for gpdb and pg versions.
    2. Also enabled model_averaging_e2e tests for pg
---
 .../deep_learning/test/madlib_keras_fit.sql_in     |  1 -
 .../test/madlib_keras_model_averaging_e2e.sql_in   |  7 ++---
 .../test/madlib_keras_model_selection.sql_in       |  2 +-
 .../test/madlib_keras_model_selection_e2e.sql_in   |  4 +++
 .../test/madlib_keras_transfer_learning.sql_in     |  3 +-
 .../utilities/test/unit_tests/test_utilities.py_in | 28 ++++++++++++++++++
 .../postgres/modules/utilities/utilities.py_in     |  7 ++++-
 .../postgres/modules/utilities/utilities.sql_in    | 33 ++++++++++++++++++++++
 8 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in
index 23ee058..a35eb6b 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_fit.sql_in
@@ -417,4 +417,3 @@ SELECT madlib_keras_fit(
     $$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
     $$ batch_size=2, epochs=1, verbose=0 $$::text,
     3);
-
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
index bbb757b..20e2332 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_averaging_e2e.sql_in
@@ -26,9 +26,6 @@ m4_include(`SQLCommon.m4')
              `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
 )
 
-m4_changequote(`<!', `!>')
-m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!
--- Multiple models End-to-End test
 DROP TABLE if exists pg_temp.iris_model, pg_temp.iris_model_summary;
 SELECT madlib_keras_fit(
 	'iris_data_packed',
@@ -40,6 +37,7 @@ SELECT madlib_keras_fit(
 	3,
 	FALSE
 );
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
 
 SELECT assert(
         model_arch_table = 'iris_model_arch' AND
@@ -69,6 +67,7 @@ SELECT madlib_keras_predict(
     'pg_temp.iris_predict',
     'prob',
     FALSE);
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
 
 -- Run Evaluate
 DROP TABLE IF EXISTS pg_temp.evaluate_out;
@@ -82,6 +81,7 @@ SELECT assert(loss >= 0 AND
         metric >= 0 AND
         metrics_type = '{accuracy}', 'Evaluate output validation failed.  Actual:' || __to_char(evaluate_out))
 FROM pg_temp.evaluate_out;
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
 
 -- Test for one-hot encoded user input data
 DROP TABLE if exists iris_model, iris_model_summary, iris_model_info;
@@ -137,4 +137,3 @@ SELECT assert(loss >= 0 AND
         metric >= 0 AND
         metrics_type = '{accuracy}', 'Evaluate output validation failed.  Actual:' || __to_char(evaluate_out))
 FROM evaluate_out;
-!>)
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index d101135..391beb7 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -349,7 +349,7 @@ SELECT madlib_keras_fit_multiple_model(
 -- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off'
 -- but we change it to 'on' in fit_multiple.py. Assert that the value is
 -- reset after calling fit_multiple
-select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off');
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off') END;
 
 SELECT assert(COUNT(*)=4, 'Info table must have exactly same rows as the number of msts.')
 FROM iris_multiple_model_info;
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
index 818a013..d738f48 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
@@ -112,6 +112,8 @@ SELECT madlib_keras_fit_multiple_model(
 	FALSE
 );
 
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
+
 SELECT assert(
         model_arch_table = 'iris_model_arch' AND
         validation_table is NULL AND
@@ -142,6 +144,7 @@ SELECT madlib_keras_predict(
     'prob',
     NULL,
     1);
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
 
 -- Run Evaluate
 DROP TABLE IF EXISTS evaluate_out;
@@ -151,6 +154,7 @@ SELECT madlib_keras_evaluate(
     'evaluate_out',
     NULL,
     1);
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('plan_cache_mode', 'auto') END;
 
 SELECT assert(loss >= 0 AND
         metric >= 0 AND
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
index c5c8a93..4334fa6 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
@@ -254,8 +254,7 @@ SELECT madlib_keras_fit_multiple_model(
 -- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off'
 -- but we change it to 'on' in fit_multiple.py. Assert that the value is
 -- reset after calling fit_multiple
-select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off');
-
+SELECT CASE WHEN is_ver_greater_than_gp_640_or_pg_11() is TRUE THEN assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off') END;
 
 SELECT assert(
   5 IN (SELECT mst_key FROM iris_multiple_model),
diff --git a/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in b/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in
index 0318516..9df924e 100644
--- a/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in
+++ b/src/ports/postgres/modules/utilities/test/unit_tests/test_utilities.py_in
@@ -378,5 +378,33 @@ class UtilitiesTestCase(unittest.TestCase):
                 self.py_list, self.sql_array_col, self.colname, self.coltype,
                 self.has_one_ele, "dummy_module")
 
+    def test_is_platform_gp6_or_up_input_gpdb6(self):
+
+        self.subject.is_platform_pg = Mock(return_value = False)
+
+        self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 9.4.24 (Greenplum Database 6.3.0 build commit:aabd)'}]]
+        self.assertTrue(self.subject.is_platform_gp6_or_up())
+
+    def test_is_platform_gp6_or_up_input_gpdb5(self):
+
+        self.subject.is_platform_pg = Mock(return_value = False)
+
+        self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 8.3.23 (Greenplum Database 5.24.0 build commit:bdca)'}]]
+        self.assertFalse(self.subject.is_platform_gp6_or_up())
+
+    def test_is_platform_gp6_or_up_input_pg(self):
+
+        self.subject.is_platform_pg = Mock(return_value = True)
+
+        self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 10.7'}]]
+        self.assertFalse(self.subject.is_platform_gp6_or_up())
+
+    def test_is_platform_gp6_or_up_input_gpdb7(self):
+
+        self.subject.is_platform_pg = Mock(return_value = False)
+
+        self.plpy_mock_execute.side_effect = [[{ 'version':'PostgreSQL 9.4.24 (Greenplum Database 7.1.0 build commit:aabd)'}]]
+        self.assertTrue(self.subject.is_platform_gp6_or_up())
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 73ff894..fca3a6b 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -1202,7 +1202,7 @@ def rename_table(schema_madlib, orig_name, new_name):
                              format(orig_name))
     return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
                                         new_table_schema, new_table_name)
-
+# ------------------------------------------------------------------------------
 def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
                                  new_table_schema, new_table_name):
     """
@@ -1276,3 +1276,8 @@ def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
         """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}"""
         .format(**locals()))
     return new_name
+# ------------------------------------------------------------------------------
+
+def is_platform_gp6_or_up():
+    version_wrapper = __mad_version()
+    return not is_platform_pg() and not version_wrapper.is_gp_version_less_than('6.0')
diff --git a/src/ports/postgres/modules/utilities/utilities.sql_in b/src/ports/postgres/modules/utilities/utilities.sql_in
index c028661..9e5deee 100644
--- a/src/ports/postgres/modules/utilities/utilities.sql_in
+++ b/src/ports/postgres/modules/utilities/utilities.sql_in
@@ -541,3 +541,36 @@ BEGIN
     RETURN 0;
 END;
 $$ LANGUAGE plpgsql;
+
+-- A few of the gucs like plan_cache_mode and dev_opt_unsafe_truncate_in_subtransaction
+-- are only available in either > pg 11 or > gpdb 6.5. Using this function we
+-- can make sure to run the guc assertion test (assert_guc_value) on the correct
+-- platform versions.
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_ver_greater_than_gp_640_or_pg_11()
+RETURNS BOOLEAN AS $$
+PythonFunctionBodyOnly(utilities, utilities, is_gp_version_less_than)
+    from utilities.utilities import __mad_version
+    from utilities.utilities import is_platform_pg
+    from utilities.utilities import is_pg_major_version_less_than
+    if is_platform_pg:
+        is_pg_major_less_than_12 = is_pg_major_version_less_than(None, 12)
+        return not is_pg_major_less_than_12
+    else:
+        is_ver_less_than_650 = __mad_version().is_gp_version_less_than('6.5.0')
+        return not is_ver_less_than_650
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert_guc_value(
+ guc_name           TEXT,
+ expected_guc_value TEXT)
+RETURNS VOID AS $$
+import plpy
+actual_guc_value = plpy.execute('show {0}'.format(guc_name))[0]
+actual_guc_value = actual_guc_value[guc_name]
+if actual_guc_value != expected_guc_value:
+    plpy.error('guc {0} assertion failed. Expected Value: {1}, '
+               'Actual Value: {2}'.format(guc_name, expected_guc_value, actual_guc_value))
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

[madlib] 05/05: DL: Use unlogged tables for fit multiple

Posted by nk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 01ede7c82231acb12e6c4b4234fdbb3ab6633b72
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Tue Mar 10 14:48:42 2020 -0700

    DL: Use unlogged tables for fit multiple
    
    GPDB6 supports UNLOGGED tables. From gpdb documentation
    https://gpdb.docs.pivotal.io/6-0/ref_guide/sql_commands/CREATE_TABLE_AS.html
    
    ```
    Data written to unlogged tables is not written to the write-ahead (WAL)
    log, which makes them considerably faster than ordinary tables
    ```
    
    We saw speed improvements because of using unlogged tables on our gcp
    cluster with GPDB6. But, we do not want the final model output table,
    summary table and the info table to be unlogged. This commit ensures for
    all intermediate iterations, except the very final hop in the last
    iteration, the model output table is created as `UNLOGGED` for
    performance. Only for the last hop in the last iteration, it is created
    as a persistent table.
    
    This commit also adds a test to validate model output, summary and info
    tables are persistent(not unlogged) and adds a new utility function to
    check if the gpdb version is 6 and up.
    
    Co-authored-by: Ekta Khanna <ek...@pivotal.io>
---
 .../deep_learning/madlib_keras_fit_multiple_model.py_in | 15 ++++++++++-----
 .../test/madlib_keras_model_selection.sql_in            |  5 +++++
 .../test/madlib_keras_transfer_learning.sql_in          |  6 ++++++
 src/ports/postgres/modules/utilities/utilities.py_in    |  5 +++++
 src/ports/postgres/modules/utilities/utilities.sql_in   | 17 +++++++++++++++++
 5 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index c6352b5..9de9774 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -35,8 +35,9 @@ from madlib_keras_wrapper import *
 from utilities.control import MinWarning
 from utilities.control import OptimizerControl
 from utilities.control import SetGUC
-from utilities.utilities import unique_string
 from utilities.utilities import add_postfix
+from utilities.utilities import is_platform_gp6_or_up
+from utilities.utilities import unique_string
 from utilities.utilities import rotate
 from utilities.utilities import madlib_version
 from utilities.utilities import is_platform_pg
@@ -181,6 +182,7 @@ class FitMultipleModel():
         random.shuffle(self.msts_for_schedule)
         self.grand_schedule = self.generate_schedule(self.msts_for_schedule)
         self.gp_segment_id_col = '0' if is_platform_pg() else GP_SEGMENT_ID_COLNAME
+        self.unlogged_table = "UNLOGGED" if is_platform_gp6_or_up() else ''
 
         if self.warm_start:
             self.create_model_output_table_warm_start()
@@ -224,6 +226,7 @@ class FitMultipleModel():
                 mst_row = [self.grand_schedule[dist_key][mst_idx]
                            for dist_key in self.dist_keys]
                 self.create_mst_schedule_table(mst_row)
+                self.is_final_training_call = (iter == self.num_iterations and mst_idx == total_msts-1)
                 if mst_idx == 0:
                     start_iteration = time.time()
                 self.run_training(mst_idx)
@@ -286,7 +289,7 @@ class FitMultipleModel():
 
     def create_mst_schedule_table(self, mst_row):
         mst_temp_query = """
-                         CREATE TABLE {self.mst_current_schedule_tbl}
+                         CREATE {self.unlogged_table} TABLE {self.mst_current_schedule_tbl}
                                 ({self.model_id_col} INTEGER,
                                  {self.compile_params_col} VARCHAR,
                                  {self.fit_params_col} VARCHAR,
@@ -556,7 +559,7 @@ class FitMultipleModel():
         # that was used for initializing GPU memory.
         use_gpus = self.use_gpus if self.use_gpus else False
         mst_weights_query = """
-            CREATE TABLE {self.mst_weights_tbl} AS
+            CREATE {self.unlogged_table} TABLE {self.mst_weights_tbl} AS
                 SELECT mst_tbl.*, wgh_tbl.{self.model_weights_col},
                        model_arch_tbl.{self.model_arch_col}
                 FROM
@@ -571,7 +574,7 @@ class FitMultipleModel():
         plpy.execute(mst_weights_query)
         use_gpus = self.use_gpus if self.use_gpus else False
         uda_query = """
-            CREATE TABLE {self.weights_to_update_tbl} AS
+            CREATE {self.unlogged_table} TABLE {self.weights_to_update_tbl} AS
             SELECT {self.schema_madlib}.fit_step_multiple_model({mb_dep_var_col},
                 {mb_indep_var_col},
                 {self.dep_shape_col},
@@ -648,9 +651,11 @@ class FitMultipleModel():
 
         with SetGUC("dev_opt_unsafe_truncate_in_subtransaction", "on"):
             temp_model_table = unique_string('updated_model')
+            unlogged_table = self.unlogged_table if not self.is_final_training_call else ''
             plpy.execute("""
-            CREATE TABLE {temp_model_table} ( LIKE {self.model_output_table}
+            CREATE {unlogged_table} TABLE {temp_model_table} ( LIKE {self.model_output_table}
             INCLUDING indexes);""".format(temp_model_table=temp_model_table,
+                                          unlogged_table=unlogged_table,
                                           self=self))
             plpy.execute("""
             INSERT INTO {temp_model_table} SELECT * FROM {self.model_output_table};
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index 391beb7..e1dbe0c 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -210,6 +210,11 @@ SELECT assert(
         'Keras Fit Multiple Output Summary Validation failed when user passes in 1-hot encoded label vector. Actual:' || __to_char(summary))
 FROM (SELECT * FROM iris_multiple_model_summary) summary;
 
+-- Test the output table created are all persistent(not unlogged)
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model') = false, 'Model output table is unlogged');
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model_summary') = false, 'Model summary output table is unlogged');
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model_info') = false, 'Model info output table is unlogged');
+
 -- Test when number of configs(3) equals number of segments(3)
 DROP TABLE IF EXISTS iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
 SELECT setseed(0);
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
index 4334fa6..43ad38b 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
@@ -173,6 +173,12 @@ SELECT madlib_keras_fit_multiple_model(
   TRUE -- warm_start
 );
 
+-- Test that when warm_start is TRUE, all the output tables are persistent(not unlogged)
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model') = false, 'Model output table is unlogged');
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model_summary') = false, 'Model summary output table is unlogged');
+SELECT assert(MADLIB_SCHEMA.is_table_unlogged('iris_multiple_model_info') = false, 'Model info output table is unlogged');
+
+
 SELECT assert(
   array_upper(training_loss, 1) = 3 AND
   array_upper(training_metrics, 1) = 3,
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index fca3a6b..577235e 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -50,6 +50,11 @@ def has_function_properties():
 def is_platform_pg():
     """ __POSTGRESQL__ variable defined during configure """
     return m4_ifdef(<!__POSTGRESQL__!>, <!True!>, <!False!>)
+
+def is_platform_gp6_or_up():
+    version_wrapper = __mad_version()
+    return not is_platform_pg() and not version_wrapper.is_gp_version_less_than('6.0')
+
 # ------------------------------------------------------------------------------
 
 
diff --git a/src/ports/postgres/modules/utilities/utilities.sql_in b/src/ports/postgres/modules/utilities/utilities.sql_in
index 9e5deee..bbf861d 100644
--- a/src/ports/postgres/modules/utilities/utilities.sql_in
+++ b/src/ports/postgres/modules/utilities/utilities.sql_in
@@ -574,3 +574,20 @@ if actual_guc_value != expected_guc_value:
                'Actual Value: {2}'.format(guc_name, expected_guc_value, actual_guc_value))
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_table_unlogged(
+  tbl_name TEXT
+)
+RETURNS boolean AS $$
+DECLARE
+ret_val   boolean;
+BEGIN
+    BEGIN
+        SELECT relpersistence='u' INTO ret_val FROM pg_class WHERE relname = tbl_name;
+    EXCEPTION
+        WHEN OTHERS THEN
+            RETURN false; -- Traps exception and returns false, for GP versions < 6 as relpersistence is not a col in pg_class
+    END;
+    RETURN ret_val;
+END;
+$$ LANGUAGE plpgsql;

[madlib] 03/05: DL: Fix disk issue by using truncate guc

Posted by nk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 4f62f4b1772ac8f46b3bbdfba5986ce7280b2da8
Author: Ekta Khanna <ek...@pivotal.io>
AuthorDate: Fri Feb 14 02:02:56 2020 +0000

    DL: Fix disk issue by using truncate guc
    
    JIRA: MADLIB-1406
    
    While testing places10 with fit multiple (gpdb5 and gpdb6, 10 iterations and 20 msts), we ran out of disk space although we had at least 1.5T left at the beginning of the query.
    
    The main contributor to the disk bloat is the update statement that we run at the end of each hop
    
    ```
    UPDATE {self.model_output_table}
    SET {self.model_weights_col} = {self.weights_to_update_tbl}.{self.model_weights_col}
    FROM {self.weights_to_update_tbl}
    WHERE {self.model_output_table}.{self.mst_key_col} = {self.weights_to_update_tbl}.{self.mst_key_col}
    ```
    
    In postgres/gpdb, every update command is really two commands i.e. insert and then delete. Because of this, the actual space is not freed and only gets freed when vacuum is run consistently or vacuum full is run at the end. We verified this by printing the {self.model_output_table} size for each mst_key and it kept on growing with each update statement.
    
    Also the disk space for other intermediate tables that get created in the run_training function never gets cleared even though we drop these tables inside the said function. This is because drop/truncate does not release disk space inside a pl function since it's in a sub transaction (this is so that it can rollback). It only releases the space once the pl function has completed execution.
    
    The only way to make this work was to add a truncate statement and change the gpdb code to do a truncate inside a sub transaction. gpdb 6.5 introduced a guc https://github.com/greenplum-db/gpdb/commit/b4692794a0abd8d8051c23019d26e22f7f3d0aa5 which when turned 'on' allows for truncating the disk space inside a sub transaction.
    
    Note that this guc is only available in gpdb 6.5 and up.
    
    run_training workflow (this function is called per hop)
    1. join schedule table with mst_weights table to do the hop
    2. Call the uda and copy the output to an intermediate table
    3. Update the model table with the results of the previous step
    4. set the truncate guc to on
    5. Create temp table from model table
    6. truncate the model table to release disk space
    7. rename temp table to model table so that it can be reused for the next hop
    
    Warm Start:
    For warm start, we can't keep calling truncate on the user passed output table
    because then we won't be able to roll it back in case of a failure. So for warm
    start, we create a copy of the output table passed by the user and then operate
    on the copied table. At the end, we drop the original output table and rename
    the copied table to the original table name.
    
    Co-authored-by: Ekta Khanna <ek...@pivotal.io>
---
 .../madlib_keras_fit_multiple_model.py_in          | 241 +++++++++++++++------
 .../madlib_keras_fit_multiple_model.sql_in         |   7 +
 .../test/madlib_keras_model_selection.sql_in       |  30 +++
 .../test/madlib_keras_model_selection_e2e.sql_in   |  21 +-
 .../test/madlib_keras_transfer_learning.sql_in     |  71 +++---
 src/ports/postgres/modules/utilities/control.py_in |   3 +-
 .../utilities/test/unit_tests/test_control.py_in   |  12 +-
 7 files changed, 276 insertions(+), 109 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index a32421b..c6352b5 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -34,6 +34,7 @@ from madlib_keras_wrapper import *
 
 from utilities.control import MinWarning
 from utilities.control import OptimizerControl
+from utilities.control import SetGUC
 from utilities.utilities import unique_string
 from utilities.utilities import add_postfix
 from utilities.utilities import rotate
@@ -41,6 +42,8 @@ from utilities.utilities import madlib_version
 from utilities.utilities import is_platform_pg
 from utilities.utilities import get_seg_number
 from utilities.utilities import get_segments_per_host
+from utilities.utilities import rename_table
+
 import json
 from collections import defaultdict
 import random
@@ -88,11 +91,7 @@ class FitMultipleModel():
         self.model_selection_table = model_selection_table
         if self.model_selection_table:
             self.model_selection_summary_table = add_postfix(self.model_selection_table, '_summary')
-        self.model_output_table = model_output_table
-        if self.model_output_table:
-            self.model_info_table = add_postfix(model_output_table, '_info')
-            self.model_summary_table = add_postfix(
-                model_output_table, '_summary')
+
         self.num_iterations = num_iterations
         self.metrics_compute_frequency = metrics_compute_frequency
         self.name = name
@@ -105,8 +104,8 @@ class FitMultipleModel():
         self.compile_params_col = ModelSelectionSchema.COMPILE_PARAMS
         self.fit_params_col = ModelSelectionSchema.FIT_PARAMS
         self.model_arch_table_col = ModelSelectionSchema.MODEL_ARCH_TABLE
-        self.model_weights_col=ModelArchSchema.MODEL_WEIGHTS
-        self.model_arch_col=ModelArchSchema.MODEL_ARCH
+        self.model_weights_col = ModelArchSchema.MODEL_WEIGHTS
+        self.model_arch_col = ModelArchSchema.MODEL_ARCH
         self.train_mst_metric_eval_time = defaultdict(list)
         self.train_mst_loss = defaultdict(list)
         self.train_mst_metric = defaultdict(list)
@@ -116,21 +115,41 @@ class FitMultipleModel():
         self.use_gpus = use_gpus
         self.segments_per_host = get_segments_per_host()
         if self.use_gpus:
-            self.accessible_gpus_for_seg = get_accessible_gpus_for_seg(self.schema_madlib,
-                self.segments_per_host, self.module_name)
+            self.accessible_gpus_for_seg = get_accessible_gpus_for_seg(
+                self.schema_madlib, self.segments_per_host, self.module_name)
         else:
             self.accessible_gpus_for_seg = get_seg_number()*[0]
 
+        self.original_model_output_table = model_output_table
+        if self.original_model_output_table:
+            self.model_info_table = add_postfix(self.original_model_output_table, '_info')
+            self.model_summary_table = add_postfix(
+                self.original_model_output_table, '_summary')
+
+        self.model_output_table = self.original_model_output_table
+
+        """
+        For warm start, we need to copy the model output table to a temp table
+        because we call truncate on the model output table while training.
+        If the query gets aborted, we need to make sure that the user passed
+        model output table can be recovered.
+        """
+        self.warm_start = bool(warm_start)
+        self.warm_start_msts = []
+        if self.warm_start:
+            self.model_output_table = unique_string('initial_model')
+
         self.fit_validator_train = FitMultipleInputValidator(
-            self.source_table, self.validation_table, self.model_output_table,
+            self.source_table, self.validation_table, self.original_model_output_table,
             self.model_selection_table, self.model_selection_summary_table,
             mb_dep_var_col, mb_indep_var_col, self.num_iterations,
             self.model_info_table, self.mst_key_col, self.model_arch_table_col,
-            self.metrics_compute_frequency, warm_start, self.use_gpus,
+            self.metrics_compute_frequency, self.warm_start, self.use_gpus,
             self.accessible_gpus_for_seg)
         if self.metrics_compute_frequency is None:
             self.metrics_compute_frequency = num_iterations
-        self.warm_start = bool(warm_start)
+
+
         self.msts = self.fit_validator_train.msts
         self.model_arch_table = self.fit_validator_train.model_arch_table
         self.metrics_iters = []
@@ -156,16 +175,26 @@ class FitMultipleModel():
         self.dist_keys = query_dist_keys(self.source_table, dist_key_col)
         if len(self.msts) < len(self.dist_keys):
             self.msts_for_schedule = self.msts + [None] * \
-                (len(self.dist_keys) - len(self.msts))
+                                     (len(self.dist_keys) - len(self.msts))
         else:
             self.msts_for_schedule = self.msts
         random.shuffle(self.msts_for_schedule)
         self.grand_schedule = self.generate_schedule(self.msts_for_schedule)
         self.gp_segment_id_col = '0' if is_platform_pg() else GP_SEGMENT_ID_COLNAME
 
-        self.create_model_output_table()
+        if self.warm_start:
+            self.create_model_output_table_warm_start()
+        else:
+            self.create_model_output_table()
+
         self.weights_to_update_tbl = unique_string(desp='weights_to_update')
         self.fit_multiple_model()
+
+        # Update and cleanup metadata tables
+        self.insert_info_table()
+        self.create_model_summary_table()
+        if self.warm_start:
+            self.cleanup_for_warm_start()
         reset_cuda_env(original_cuda_env)
 
     def fit_multiple_model(self):
@@ -175,8 +204,18 @@ class FitMultipleModel():
             self.metrics_elapsed_start_time = time.time()
             self.train_multiple_model()
             self.end_training_time = datetime.datetime.now()
-            self.insert_info_table()
-            self.create_model_summary_table()
+
+    def cleanup_for_warm_start(self):
+        """
+        1. drop original model table
+        2. rename temp to original
+        :return:
+        """
+        drop_query = "DROP TABLE IF EXISTS {}".format(
+            self.original_model_output_table)
+        plpy.execute(drop_query)
+        rename_table(self.schema_madlib, self.model_output_table,
+                     self.original_model_output_table)
 
     def train_multiple_model(self):
         total_msts = len(self.msts_for_schedule)
@@ -187,11 +226,13 @@ class FitMultipleModel():
                 self.create_mst_schedule_table(mst_row)
                 if mst_idx == 0:
                     start_iteration = time.time()
-                self.run_training()
+                self.run_training(mst_idx)
                 if mst_idx == (total_msts - 1):
                     end_iteration = time.time()
-                    self.info_str = "\tTime for training in iteration {0}: {1} sec\n".format(iter,
-                                        end_iteration - start_iteration)
+                    self.info_str = "\tTime for training in iteration " \
+                                    "{0}: {1} sec\n".format(iter,
+                                                            end_iteration -
+                                                            start_iteration)
             if should_compute_metrics_this_iter(iter,
                                                 self.metrics_compute_frequency,
                                                 self.num_iterations):
@@ -218,7 +259,7 @@ class FitMultipleModel():
             self.info_str += "\n\tValidation set after iteration {0}:".format(epoch)
         for mst in self.msts:
             weights = query_weights(self.model_output_table, self.model_weights_col,
-                self.mst_key_col, mst[self.mst_key_col])
+                                    self.mst_key_col, mst[self.mst_key_col])
             model_arch, _ = get_model_arch_weights(self.model_arch_table, mst[self.model_id_col])
             _, metric, loss = compute_loss_and_metrics(
                 self.schema_madlib, table, "$madlib${0}$madlib$".format(
@@ -274,27 +315,44 @@ class FitMultipleModel():
                                 """.format(**locals())
             plpy.execute(mst_insert_query)
 
+
     def create_model_output_table(self):
-        warm_start_msts = []
-        if self.warm_start:
-            plpy.execute(""" DELETE FROM {self.model_output_table}
+        output_table_create_query = """
+                                    CREATE TABLE {self.model_output_table}
+                                    ({self.mst_key_col} INTEGER PRIMARY KEY,
+                                     {self.model_weights_col} BYTEA,
+                                     {self.model_arch_col} JSON)
+                                    """.format(self=self)
+        plpy.execute(output_table_create_query)
+        self.initialize_model_output_and_info()
+
+    def create_model_output_table_warm_start(self):
+        """
+        For warm start, we need to copy the model output table to a temp table
+        because we call truncate on the model output table while training.
+        If the query gets aborted, we need to make sure that the user passed
+        model output table can be recovered.
+        """
+        plpy.execute("""
+            CREATE TABLE {self.model_output_table} (
+            LIKE {self.original_model_output_table} INCLUDING indexes);
+            """.format(self=self))
+
+        plpy.execute("""INSERT INTO {self.model_output_table}
+            SELECT * FROM {self.original_model_output_table};
+            """.format(self=self))
+
+        plpy.execute(""" DELETE FROM {self.model_output_table}
                 WHERE {self.mst_key_col} NOT IN (
                     SELECT {self.mst_key_col} FROM {self.model_selection_table})
                 """.format(self=self))
-            warm_start_msts = plpy.execute(
-                """ SELECT array_agg({0}) AS a FROM {1}
-                """.format(self.mst_key_col, self.model_output_table))[0]['a']
-            plpy.execute("DROP TABLE {0}".format(self.model_info_table))
-
-        else:
-            output_table_create_query = """
-                                        CREATE TABLE {self.model_output_table}
-                                        ({self.mst_key_col} INTEGER PRIMARY KEY,
-                                         {self.model_weights_col} BYTEA,
-                                         {self.model_arch_col} JSON)
-                                        """.format(self=self)
-            plpy.execute(output_table_create_query)
+        self.warm_start_msts = plpy.execute(
+            """ SELECT array_agg({0}) AS a FROM {1}
+            """.format(self.mst_key_col, self.model_output_table))[0]['a']
+        plpy.execute("DROP TABLE {0}".format(self.model_info_table))
+        self.initialize_model_output_and_info()
 
+    def initialize_model_output_and_info(self):
         info_table_create_query = """
                                   CREATE TABLE {self.model_info_table}
                                   ({self.mst_key_col} INTEGER PRIMARY KEY,
@@ -313,7 +371,7 @@ class FitMultipleModel():
                                    validation_loss_final DOUBLE PRECISION,
                                    validation_metrics DOUBLE PRECISION[],
                                    validation_loss DOUBLE PRECISION[])
-                               """.format(self=self)
+                                       """.format(self=self)
 
         plpy.execute(info_table_create_query)
         for mst in self.msts:
@@ -322,23 +380,23 @@ class FitMultipleModel():
 
 
             # If warm start is enabled, weights from transfer learning cannot be
-            # used, even if a particular model doesn't have warm start weigths.
+            # used, even if a particular model doesn't have warm start weights.
             if self.warm_start:
                 model_weights = None
                 mst_filter = """
-                    WHERE {mst_col}={mst_key}
-                """.format(
-                        mst_col=self.mst_key_col,
-                        mst_key=mst['mst_key']
-                    )
- 
+                            WHERE {mst_col}={mst_key}
+                        """.format(
+                    mst_col=self.mst_key_col,
+                    mst_key=mst['mst_key']
+                )
+
             else:
                 mst_filter = ''
 
             serialized_weights = get_initial_weights(self.model_output_table,
                                                      model_arch,
                                                      model_weights,
-                                                     mst['mst_key'] in warm_start_msts,
+                                                     mst['mst_key'] in self.warm_start_msts,
                                                      self.use_gpus,
                                                      self.accessible_gpus_for_seg,
                                                      mst_filter
@@ -352,25 +410,25 @@ class FitMultipleModel():
                 metrics_list) if is_metrics_specified else 'NULL'
 
             info_table_insert_query = """
-                    INSERT INTO {self.model_info_table}({self.mst_key_col},
-                                {self.model_id_col}, {self.compile_params_col},
-                                {self.fit_params_col}, model_type, model_size,
-                                metrics_type)
-                        VALUES ({mst_key_val}, {model_id},
-                                $madlib${compile_params}$madlib$,
-                                $madlib${fit_params}$madlib$, '{model_type}',
-                                {model_size}, {metrics_type})
-                """.format(self=self,
-                           mst_key_val=mst[self.mst_key_col],
-                           model_id=mst[self.model_id_col],
-                           compile_params=mst[self.compile_params_col],
-                           fit_params=mst[self.fit_params_col],
-                           model_type='madlib_keras',
-                           model_size=model_size,
-                           metrics_type=metrics_type)
+                            INSERT INTO {self.model_info_table}({self.mst_key_col},
+                                        {self.model_id_col}, {self.compile_params_col},
+                                        {self.fit_params_col}, model_type, model_size,
+                                        metrics_type)
+                                VALUES ({mst_key_val}, {model_id},
+                                        $madlib${compile_params}$madlib$,
+                                        $madlib${fit_params}$madlib$, '{model_type}',
+                                        {model_size}, {metrics_type})
+                        """.format(self=self,
+                                   mst_key_val=mst[self.mst_key_col],
+                                   model_id=mst[self.model_id_col],
+                                   compile_params=mst[self.compile_params_col],
+                                   fit_params=mst[self.fit_params_col],
+                                   model_type='madlib_keras',
+                                   model_size=model_size,
+                                   metrics_type=metrics_type)
             plpy.execute(info_table_insert_query)
 
-            if not mst['mst_key'] in warm_start_msts:
+            if not mst['mst_key'] in self.warm_start_msts:
                 output_table_insert_query = """
                                     INSERT INTO {self.model_output_table}(
                                         {self.mst_key_col}, {self.model_weights_col},
@@ -484,7 +542,7 @@ class FitMultipleModel():
             if self.validation_table:
                 self.update_info_table(mst, False)
 
-    def run_training(self):
+    def run_training(self, mst_idx):
         # NOTE: In the DL module, we want to avoid CREATING TEMP tables
         # (creates a slice which stays until the session is disconnected)
         # or minimize writing queries that generate plans with Motions (creating
@@ -496,6 +554,7 @@ class FitMultipleModel():
         # Therefore we want to have queries that do not add motions and all the
         # sub-queries running Keras/tensorflow operations reuse the same slice(process)
         # that was used for initializing GPU memory.
+        use_gpus = self.use_gpus if self.use_gpus else False
         mst_weights_query = """
             CREATE TABLE {self.mst_weights_tbl} AS
                 SELECT mst_tbl.*, wgh_tbl.{self.model_weights_col},
@@ -553,7 +612,55 @@ class FitMultipleModel():
             WHERE {self.model_output_table}.{self.mst_key_col} = {self.weights_to_update_tbl}.{self.mst_key_col}
         """.format(self=self)
         plpy.execute(update_query)
-        plpy.execute("DROP TABLE IF EXISTS {0}, {1}, {2}".format(
-                                                        self.mst_weights_tbl,
-                                                        self.mst_current_schedule_tbl,
-                                                        self.weights_to_update_tbl))
+
+        self.truncate_and_drop_tables()
+
+    def truncate_and_drop_tables(self):
+        """
+        Context: UPDATE statements in postgres are not in-place replacements but
+        the row to be updated is marked for deletion(note that the disk space for
+        this row doesn't get released until vaccuum is called) and a new row in
+        inserted.
+
+        This function will clear out the disk space used by the model_output_table
+        and also drop all the other intermediate tables.
+        If available, set the `` guc so that the truncate command can release the
+        disk space. The disk space will be released immediately and hence the
+        model_output table won't grow in size with each UPDATE statement.
+
+        Without this guc, the disk space won't be released and each
+        call to the UPDATE statement will keep adding to the disk space. The disk
+        space will only be released when the query is completed.
+
+        The guc can cause data loss if not used properly. Since truncate will
+        actually clear the disk space immediately, there is no way to recover to
+        the state before truncate was called on that table. So this guc should only
+        be set for intermediate tables and never for tables created outside the
+        scope of the fit_multiple udf.
+
+        Workflow
+        1. Create temp table from model table (including the indexes)
+        2. truncate the model table to release disk space
+        3. rename temp table to model table so that it can be reused for the next
+        hop
+        :return:
+        """
+
+        with SetGUC("dev_opt_unsafe_truncate_in_subtransaction", "on"):
+            temp_model_table = unique_string('updated_model')
+            plpy.execute("""
+            CREATE TABLE {temp_model_table} ( LIKE {self.model_output_table}
+            INCLUDING indexes);""".format(temp_model_table=temp_model_table,
+                                          self=self))
+            plpy.execute("""
+            INSERT INTO {temp_model_table} SELECT * FROM {self.model_output_table};
+            TRUNCATE TABLE {self.model_output_table};
+            DROP TABLE {self.model_output_table};
+            """.format(temp_model_table=temp_model_table, self=self))
+            rename_table(self.schema_madlib, temp_model_table,
+                         self.model_output_table)
+            plpy.execute("""
+            TRUNCATE TABLE {self.mst_weights_tbl}, {self.mst_current_schedule_tbl},
+            {self.weights_to_update_tbl};
+            DROP TABLE IF EXISTS {self.mst_weights_tbl}, {self.mst_current_schedule_tbl},
+            {self.weights_to_update_tbl};""".format(self=self))
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
index df79d86..cd58d93 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.sql_in
@@ -88,6 +88,12 @@ You can set up the models and hyperparameters to try with the
 Model Selection</a> utility to define the unique combinations
 of model architectures, compile and fit parameters.
 
+@note If 'madlib_keras_fit_multiple_model()' is running on GPDB 5, the database will
+keep adding to the disk space (in proportion to model size) and will only
+release the disk space once the fit multiple query has completed execution.
+This is not the case for GPDB 6+ where disk space is released during the
+fit multiple query.
+
 @anchor keras_fit
 @par Fit
 The fit (training) function has the following format:
@@ -120,6 +126,7 @@ madlib_keras_fit_multiple_model(
   <dt>model_output_table</dt>
   <dd>TEXT. Name of the output table containing the
   multiple models created.
+  @note pg_temp is not allowed as an output table schema for fit multiple.
   Details of output tables are shown below.
   </dd>
 
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index ddf2e0f..d101135 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -346,6 +346,11 @@ SELECT madlib_keras_fit_multiple_model(
 	FALSE
 );
 
+-- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off'
+-- but we change it to 'on' in fit_multiple.py. Assert that the value is
+-- reset after calling fit_multiple
+select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off');
+
 SELECT assert(COUNT(*)=4, 'Info table must have exactly same rows as the number of msts.')
 FROM iris_multiple_model_info;
 
@@ -388,4 +393,29 @@ SELECT assert(
         'Keras Fit Multiple num_clases and class values Validation failed. Actual:' || __to_char(summary))
 FROM (SELECT * FROM iris_multiple_model_summary) summary;
 
+------------- Test for schema qualified output table and input table -----------------------
+CREATE SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__;
+CREATE TABLE __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_data_packed as select * from iris_data_packed;
+CREATE TABLE __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_data_packed_summary as select * from iris_data_packed_summary;
+
+-- do not drop the output table created in the previous test
+SELECT count(*) from iris_multiple_model;
+SELECT madlib_keras_fit_multiple_model(
+	'__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_data_packed',
+	'__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model',
+	'mst_table_1row',
+	1,
+	FALSE,
+	NULL,
+	1,
+	FALSE
+);
+SELECT count(*) from __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model;
+SELECT assert(
+        num_classes = 3 AND
+        class_values = '{Iris-setosa,Iris-versicolor,NULL}',
+        'Keras Fit Multiple validation failed. Actual:' || __to_char(summary))
+FROM (SELECT * FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model_summary) summary;
+
+DROP SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__ CASCADE;
 !>)
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
index 43f08d0..818a013 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection_e2e.sql_in
@@ -45,12 +45,13 @@ SELECT load_model_selection_table(
     ]
 );
 
-DROP TABLE if exists pg_temp.iris_multiple_model,
-                     pg_temp.iris_multiple_model_summary,
-                     pg_temp.iris_multiple_model_info;
+CREATE SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__;
+DROP TABLE if exists __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model,
+                     __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model_summary,
+                     __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model_info;
 SELECT madlib_keras_fit_multiple_model(
 	'iris_data_packed',
-	'pg_temp.iris_multiple_model',
+	'__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model',
 	'pg_temp.mst_table',
 	3,
 	FALSE
@@ -59,9 +60,9 @@ SELECT madlib_keras_fit_multiple_model(
 SELECT assert(
         model_arch_table = 'iris_model_arch' AND
         validation_table is NULL AND
-        model_info = 'pg_temp.iris_multiple_model_info' AND
+        model_info = '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model_info' AND
         source_table = 'iris_data_packed' AND
-        model = 'pg_temp.iris_multiple_model' AND
+        model = '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model' AND
         dependent_varname = 'class_text' AND
         independent_varname = 'attributes' AND
         madlib_version is NOT NULL AND
@@ -73,12 +74,12 @@ SELECT assert(
         dependent_vartype LIKE '%char%' AND
         normalizing_const = 1,
         'Keras Fit Multiple Output Summary Validation failed. Actual:' || __to_char(summary))
-FROM (SELECT * FROM pg_temp.iris_multiple_model_summary) summary;
+FROM (SELECT * FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model_summary) summary;
 
 -- Run Predict
 DROP TABLE IF EXISTS pg_temp.iris_predict;
 SELECT madlib_keras_predict(
-    'pg_temp.iris_multiple_model',
+    '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model',
     'iris_data',
     'id',
     'attributes',
@@ -90,7 +91,7 @@ SELECT madlib_keras_predict(
 -- Run Evaluate
 DROP TABLE IF EXISTS pg_temp.evaluate_out;
 SELECT madlib_keras_evaluate(
-    'pg_temp.iris_multiple_model',
+    '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_multiple_model',
     'iris_data_val',
     'pg_temp.evaluate_out',
     NULL,
@@ -155,4 +156,6 @@ SELECT assert(loss >= 0 AND
         metric >= 0 AND
         metrics_type = '{accuracy}', 'Evaluate output validation failed.  Actual:' || __to_char(evaluate_out))
 FROM evaluate_out;
+
+DROP SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__ CASCADE;
 !>)
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
index c73fc74..c5c8a93 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_transfer_learning.sql_in
@@ -25,10 +25,11 @@ m4_include(`SQLCommon.m4')
              `\(.*\)libmadlib\.so',
              `\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
 )
-
-DROP TABLE IF EXISTS pg_temp.iris_model, pg_temp.iris_model_summary;
+-------------- Warm start test (along with schema qualified output table) -------------------------
+CREATE SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__;
+DROP TABLE IF EXISTS iris_model, iris_model_summary;
 SELECT madlib_keras_fit('iris_data_packed',   -- source table
-                        'pg_temp.iris_model',          -- model output table
+                        '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model',          -- model output table
                         'iris_model_arch',  -- model arch table
                          1,                    -- model arch id
                          $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$,  -- compile_params
@@ -45,43 +46,44 @@ SELECT assert(
   array_upper(training_loss, 1) = 5 AND
   array_upper(training_metrics, 1) = 5,
   'metrics compute frequency must be 1.')
-FROM pg_temp.iris_model_summary;
+FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model_summary;
 
 SELECT assert(
   training_loss[5]-training_loss[1] < 0.1 AND
   training_metrics[5]-training_metrics[1] > -0.1,
     'The loss and accuracy should have improved with more iterations.'
 )
-FROM pg_temp.iris_model_summary;
+FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model_summary;
 
 -- Make a copy of the loss and metrics array, to compare it with runs after
 -- warm start and transfer learning.
 DROP TABLE IF EXISTS iris_model_first_run;
 CREATE TABLE iris_model_first_run AS
 SELECT training_loss_final, training_metrics_final
-FROM pg_temp.iris_model_summary;
+FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model_summary;
 
 -- Copy weights that were learnt from the previous run, for transfer
 -- learning. Copy it now, because using warm_start will overwrite it.
-UPDATE iris_model_arch set model_weights = (select model_weights from iris_model)  WHERE model_id = 2;
+UPDATE iris_model_arch set model_weights = (select model_weights from __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model)
+WHERE model_id = 2;
 
 -- Warm start test
 SELECT madlib_keras_fit('iris_data_packed',   -- source table
-                       'pg_temp.iris_model',          -- model output table
+                       '__MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model',          -- model output table
                        'iris_model_arch',  -- model arch table
                         2,                    -- model arch id
                         $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$,  -- compile_params
                         $$ batch_size=5, epochs=3 $$,  -- fit_params
                         2,                    -- num_iterations,
                         NULL, NULL, 1,
-                        true -- warm start
+                        TRUE -- warm start
                       );
 
 SELECT assert(
   array_upper(training_loss, 1) = 2 AND
   array_upper(training_metrics, 1) = 2,
   'metrics compute frequency must be 1.')
-FROM pg_temp.iris_model_summary;
+FROM __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model_summary;
 
 SELECT assert(
   abs(first.training_loss_final-second.training_loss[1]) < 1e-6 AND
@@ -89,12 +91,16 @@ SELECT assert(
   abs(first.training_metrics_final-second.training_metrics[1]) < 1e-10 AND
   abs(first.training_metrics_final-second.training_metrics[2]) < 1e-10,
   'warm start test failed because training loss and metrics don''t match the expected value from the previous run of keras fit.')
-FROM iris_model_first_run AS first, pg_temp.iris_model_summary AS second;
+FROM iris_model_first_run AS first, __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__.iris_model_summary AS second;
+
+DROP SCHEMA __MADLIB__DEEP_LEARNING_SCHEMA__MADLIB__ CASCADE;
 
--- Transfer learning test
+---------------- end Warm start test ----------------------------------------------------
+
+---------------- Transfer learning test ----------------------------------------------------
 DROP TABLE IF EXISTS iris_model_transfer, iris_model_transfer_summary;
 SELECT madlib_keras_fit('iris_data_packed',   -- source table
-                       'pg_temp.iris_model_transfer',          -- model output table
+                       'iris_model_transfer',          -- model output table
                        'iris_model_arch',  -- model arch table
                         2,                    -- model arch id
                         $$ loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] $$,  -- compile_params
@@ -107,7 +113,7 @@ SELECT assert(
   array_upper(training_loss, 1) = 2 AND
   array_upper(training_metrics, 1) = 2,
   'metrics compute frequency must be 1.')
-FROM pg_temp.iris_model_transfer_summary;
+FROM iris_model_transfer_summary;
 
 SELECT assert(
   abs(first.training_loss_final-second.training_loss[1]) < 1e-6 AND
@@ -115,7 +121,8 @@ SELECT assert(
   abs(first.training_metrics_final-second.training_metrics[1]) < 1e-10 AND
   abs(first.training_metrics_final-second.training_metrics[2]) < 1e-10,
   'Transfer learning test failed because training loss and metrics don''t match the expected value.')
-FROM iris_model_first_run AS first, iris_model_transfer_summary AS second;
+FROM iris_model_first_run AS first,
+iris_model_transfer_summary AS second;
 
 -- Rerun the iris setup to discard the changes
 \i m4_regexp(MODULE_PATHNAME,
@@ -139,11 +146,11 @@ SELECT load_model_selection_table(
     ]
 );
 
-DROP TABLE if exists pg_temp.iris_multiple_model, pg_temp.iris_multiple_model_summary, pg_temp.iris_multiple_model_info;
+DROP TABLE if exists iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
 SELECT setseed(0);
 SELECT madlib_keras_fit_multiple_model(
   'iris_data_packed',
-  'pg_temp.iris_multiple_model',
+  'iris_multiple_model',
   'mst_table',
   3,
   FALSE, NULL, 1
@@ -153,12 +160,12 @@ DROP TABLE IF EXISTS iris_model_first_run;
 CREATE TABLE iris_model_first_run AS
 SELECT mst_key, model_id, training_loss, training_metrics,
     training_loss_final, training_metrics_final
-FROM pg_temp.iris_multiple_model_info;
+FROM iris_multiple_model_info;
 
 -- warm start for fit multiple model
 SELECT madlib_keras_fit_multiple_model(
   'iris_data_packed',
-  'pg_temp.iris_multiple_model',
+  'iris_multiple_model',
   'mst_table',
   3,
   FALSE,
@@ -170,7 +177,7 @@ SELECT assert(
   array_upper(training_loss, 1) = 3 AND
   array_upper(training_metrics, 1) = 3,
   'metrics compute frequency must be 1.')
-FROM pg_temp.iris_multiple_model_info;
+FROM iris_multiple_model_info;
 
 SELECT assert(
   abs(first.training_loss_final-second.training_loss[1]) < 1e-6 AND
@@ -178,7 +185,7 @@ SELECT assert(
   abs(first.training_metrics_final-second.training_metrics[1]) < 1e-10 AND
   abs(first.training_metrics_final-second.training_metrics[2]) < 1e-10,
   'warm start test failed because training loss and metrics don''t match the expected value from the previous run of keras fit.')
-FROM iris_model_first_run AS first, pg_temp.iris_multiple_model_info AS second
+FROM iris_model_first_run AS first, iris_multiple_model_info AS second
 WHERE first.mst_key = second.mst_key AND first.model_id = 2;
 
 -- warm start with different mst tables
@@ -244,6 +251,11 @@ SELECT madlib_keras_fit_multiple_model(
   NULL, 1,
   TRUE -- warm_start
 );
+-- The default value of the guc 'dev_opt_unsafe_truncate_in_subtransaction' is 'off'
+-- but we change it to 'on' in fit_multiple.py. Assert that the value is
+-- reset after calling fit_multiple
+select assert_guc_value('dev_opt_unsafe_truncate_in_subtransaction', 'off');
+
 
 SELECT assert(
   5 IN (SELECT mst_key FROM iris_multiple_model),
@@ -298,30 +310,31 @@ SELECT load_model_selection_table(
     ]
 );
 
-DROP TABLE if exists pg_temp.iris_multiple_model, pg_temp.iris_multiple_model_summary, pg_temp.iris_multiple_model_info;
+-- TODO we need to drop iris_multiple_model as well as iris_multiple_model
+DROP TABLE if exists iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
 SELECT setseed(0);
 SELECT madlib_keras_fit_multiple_model(
   'iris_data_packed',
-  'pg_temp.iris_multiple_model',
+  'iris_multiple_model',
   'mst_table',
   3,
   FALSE, NULL, 1
 );
 
 UPDATE iris_model_arch
-SET model_weights = (SELECT model_weights FROM pg_temp.iris_multiple_model WHERE mst_key=1)
+SET model_weights = (SELECT model_weights FROM iris_multiple_model WHERE mst_key=1)
 WHERE model_id = 1;
 
 DROP TABLE IF EXISTS iris_model_first_run;
 CREATE TABLE iris_model_first_run AS
 SELECT mst_key, model_id, training_loss, training_metrics,
     training_loss_final, training_metrics_final
-FROM pg_temp.iris_multiple_model_info;
+FROM iris_multiple_model_info;
 
-DROP TABLE if exists pg_temp.iris_multiple_model, pg_temp.iris_multiple_model_summary, pg_temp.iris_multiple_model_info;
+DROP TABLE if exists iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
 SELECT madlib_keras_fit_multiple_model(
   'iris_data_packed',
-  'pg_temp.iris_multiple_model',
+  'iris_multiple_model',
   'mst_table',
   3,
   FALSE, NULL, 1
@@ -330,6 +343,8 @@ SELECT madlib_keras_fit_multiple_model(
 SELECT assert(
   (first.training_loss_final-second.training_loss_final) > 1e-6,
   'Transfer learning test failed because training loss and metrics don''t match the expected value.')
-FROM iris_model_first_run AS first, pg_temp.iris_multiple_model_info AS second
+FROM iris_model_first_run AS first, iris_multiple_model_info AS second
 WHERE first.mst_key = second.mst_key AND first.model_id = 1;
+
 !>)
+
diff --git a/src/ports/postgres/modules/utilities/control.py_in b/src/ports/postgres/modules/utilities/control.py_in
index b52a881..32f2f59 100644
--- a/src/ports/postgres/modules/utilities/control.py_in
+++ b/src/ports/postgres/modules/utilities/control.py_in
@@ -74,7 +74,8 @@ class SetGUC(ContextDecorator):
                 return self
 
             if self.new_guc_value:
-                plpy.execute("set {0}={1}".format(self.guc_name, self.new_guc_value))
+                plpy.execute("set {0}='{1}'".format(self.guc_name,
+                                                    self.new_guc_value))
             else:
                 if self.error_on_fail:
                     plpy.error("Cannot set {0} to None. Please provide a valid value"
diff --git a/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in b/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in
index 2d43968..5e76f78 100644
--- a/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in
+++ b/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in
@@ -94,18 +94,22 @@ class SetGUCTestCase(unittest.TestCase):
         self.module_patcher.stop()
 
     def test_set_guc_sets_new_value(self):
-        self.plpy_mock_execute.return_value = [{'foo': 'new_bar'}]
+        self.plpy_mock_execute.return_value = [{'foo': 'old_bar'}]
         with self.subject.SetGUC("foo", "new_bar") as C:
             self.assertTrue("new_bar", C.new_guc_value)
-        self.plpy_mock_execute.assert_called_with(
-            "set foo='new_bar'")
+        self.assertEqual(3, self.plpy_mock_execute.call_count)
+        self.plpy_mock_execute.assert_has_calls([call('show foo'),
+                                                 call("set foo='new_bar'"),
+                                                 call("set foo='old_bar'")])
+
 
     def test_set_guc_missing(self):
         self.plpy_mock_execute.side_effect = plpy.SPIError(
             'Unrecognized configuration parameter "foo"')
         with self.subject.SetGUC("foo", "new_bar") as C:
             self.assertFalse(C.guc_exists)
-
+        self.assertEqual(1, self.plpy_mock_execute.call_count)
+        self.plpy_mock_execute.assert_has_calls([call('show foo')])
 
 if __name__ == '__main__':
     unittest.main()