You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nk...@apache.org on 2020/03/24 20:06:32 UTC

[madlib] 01/05: Utilities: Improve rename table logic

This is an automated email from the ASF dual-hosted git repository.

nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 5a95e7c93b4effed6191f508624d4dc7333f9987
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Mar 18 17:23:12 2020 -0700

    Utilities: Improve rename table logic
    
    JIRA: MADLIB-1406
    
    Consider the example
    "rename table foo to bar.foo2"
    
    Previously rename_table would fail if the table 'foo2' already existed on the
    search_path. This is because we first renamed the table foo to foo2 and then
    set the schema to bar.
    
    The new logic fixes this by renaming foo to a unique string, setting the schema
    to bar and then renaming the unique string to bar
    
    Also moved the function to utilities since the new logic required the use of
    unique_string. Also it seemed better suited to utilities.py instead of
    validate_args.py
    
    Co-authored-by: Domino Valdano <dv...@pivotal.io>
    Co-authored-by: Ekta Khanna <ek...@pivotal.io>
---
 src/ports/postgres/modules/graph/hits.py_in        |   2 +-
 src/ports/postgres/modules/graph/pagerank.py_in    |   2 +-
 src/ports/postgres/modules/graph/wcc.py_in         |   2 +-
 src/ports/postgres/modules/linalg/svd.py_in        |   2 +-
 .../modules/regress/clustered_variance.py_in       |   2 +-
 .../postgres/modules/regress/robust_logistic.py_in |   2 +-
 .../modules/regress/robust_mlogistic.py_in         |   2 +-
 .../postgres/modules/utilities/utilities.py_in     | 114 +++++++++++++++++++++
 .../postgres/modules/utilities/validate_args.py_in |  65 ------------
 9 files changed, 121 insertions(+), 72 deletions(-)

diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index f792f15..1283070 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -47,7 +47,7 @@ from utilities.utilities import is_platform_pg
 
 from utilities.validate_args import columns_exist_in_table, drop_tables
 from utilities.validate_args import get_cols_and_types, table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table,
                        edge_params, out_table, max_iter, threshold,
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index 41f25de..e732675 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -49,7 +49,7 @@ from utilities.utilities import py_list_to_sql_string
 
 from utilities.validate_args import columns_exist_in_table, get_cols_and_types
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 
 def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 4adc52e..28b06f5 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -37,7 +37,7 @@ from utilities.validate_args import columns_exist_in_table, get_expr_type
 from utilities.utilities import is_platform_pg
 from utilities.utilities import add_postfix
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.control import MinWarning
 from graph_utils import validate_graph_coding, get_graph_usage
 from graph_utils import validate_output_and_summary_tables
diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in
index 894fae2..bf0e522 100644
--- a/src/ports/postgres/modules/linalg/svd.py_in
+++ b/src/ports/postgres/modules/linalg/svd.py_in
@@ -11,7 +11,7 @@ from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from matrix_ops import get_dims
 from matrix_ops import validate_sparse
 from matrix_ops import validate_dense
diff --git a/src/ports/postgres/modules/regress/clustered_variance.py_in b/src/ports/postgres/modules/regress/clustered_variance.py_in
index 44b042d..8931066 100644
--- a/src/ports/postgres/modules/regress/clustered_variance.py_in
+++ b/src/ports/postgres/modules/regress/clustered_variance.py_in
@@ -8,7 +8,7 @@ from utilities.utilities import unique_string
 from utilities.utilities import _string_to_array_with_quotes
 from utilities.utilities import extract_keyvalue_params
 from utilities.validate_args import columns_exist_in_table
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.validate_args import table_is_empty
 from utilities.validate_args import table_exists
 from utilities.utilities import _assert
diff --git a/src/ports/postgres/modules/regress/robust_logistic.py_in b/src/ports/postgres/modules/regress/robust_logistic.py_in
index 189123b..4bc72a4 100644
--- a/src/ports/postgres/modules/regress/robust_logistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_logistic.py_in
@@ -11,7 +11,7 @@
 import plpy
 from utilities.utilities import _assert
 from utilities.utilities import unique_string
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 
 # use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0
 from utilities.utilities import __mad_version
diff --git a/src/ports/postgres/modules/regress/robust_mlogistic.py_in b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
index 897b849..ffabd37 100644
--- a/src/ports/postgres/modules/regress/robust_mlogistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
@@ -15,7 +15,7 @@ from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 
 from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
 from utilities.validate_args import columns_exist_in_table
 
 from regress.robust_linear import _robust_linregr_validate
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 210fbd4..73ff894 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -15,6 +15,7 @@ from validate_args import input_tbl_valid
 from validate_args import is_var_valid
 from validate_args import output_tbl_valid
 from validate_args import quote_ident
+from validate_args import drop_tables
 import plpy
 
 
@@ -1162,3 +1163,116 @@ def rotate(l, n):
     """
     return l[-n:] + l[:-n]
 # ------------------------------------------------------------------------------
+
+def rename_table(schema_madlib, orig_name, new_name):
+    """
+    Renames possibly schema qualified table name to a new schema qualified name
+    ensuring the schema qualification are changed appropriately
+
+    Args:
+        @param orig_name: string, Original name of the table
+                          (must be schema qualified if table schema is not in search path)
+        @param new_name: string, New name of the table
+                          (can be schema qualified. If it is not then the original
+                           schema is maintained)
+    Returns:
+        String. The new table name qualified with the schema name
+    """
+    new_names_split = new_name.split(".")
+    if len(new_names_split) > 2:
+        raise AssertionError("Invalid table name")
+    new_table_name = new_names_split[-1]
+    new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
+
+    orig_names_split = orig_name.split(".")
+    if len(orig_names_split) > 2:
+        raise AssertionError("Invalid table name")
+
+    if len(orig_names_split) > 1:
+        orig_table_schema = orig_names_split[0]
+    else:
+        # we need to get the schema name of the original table if we are
+        # to change the schema of the new table. This is to ensure that we
+        # change the schema of the correct table in case there are multiple
+        # tables with the same new name.
+        orig_table_schema = get_first_schema(orig_name)
+
+    if orig_table_schema is None:
+        raise AssertionError("Relation {0} not found during rename".
+                             format(orig_name))
+    return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+                                        new_table_schema, new_table_name)
+
+def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+                                 new_table_schema, new_table_name):
+    """
+    Internal private function to perform the rename operation after all the
+    validation checks
+    """
+
+    """
+    CASE 1
+    If the output table is schema is pg_temp, we cannot alter table schemas from/to
+    temp schemas. If it looks like a temp schema, we stay safe and just use
+    create/drop
+        Test cases
+        foo.bar to pg_temp.bar
+        foo.bar to pg_temp.bar2
+        foo to pg_temp.bar
+        pg_temp.foo to pg_temp.bar
+    """
+    if new_table_schema and 'pg_temp' in new_table_schema:
+        """
+        If both new_table_schema and orig_table_schema have pg_temp in it,
+        just run an alter statement instead of CTAS. Without this, pca dev-check
+        fails on gpdb5/6 (but not on pg)
+        """
+        if new_table_schema != orig_table_schema:
+            plpy.info("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+                      .format(**locals()))
+            plpy.execute("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+                         .format(**locals()))
+            drop_tables([orig_name])
+            return new_name
+        else:
+            plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+                         format(**locals()))
+            return new_name
+
+    """
+    CASE 2
+    Do direct rename if the new table does not have an output schema or
+    if the new table schema is the same as the original table schema
+    Test Cases
+    rename foo to bar
+    rename foo.bar to foo.bar2
+    rename foo.bar to bar2
+    """
+    if not new_table_schema or new_table_schema == orig_table_schema:
+        plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+                     format(**locals()))
+        return orig_table_schema + "." + new_table_name
+
+    """
+    CASE 3
+    output table is schema qualified
+    1. rename the original table to an interim temp name
+    2. set the new schema on that interim table
+    3. rename interim table to the new table name
+    Test cases
+    foo.bar to foo2.bar2
+    foo.bar to foo2.bar
+    """
+    interim_temp_name = unique_string("rename_table_helper")
+    plpy.execute(
+        "ALTER TABLE {orig_name} RENAME to {interim_temp_name}".format(
+            **locals()))
+
+    plpy.execute(
+        """ALTER TABLE {interim_temp_name} SET SCHEMA {new_table_schema}""".format(
+            **locals()))
+
+    plpy.execute(
+        """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}"""
+        .format(**locals()))
+    return new_name
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index ac6bd4a..a21be58 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -164,71 +164,6 @@ def table_exists(tbl, only_first_schema=False):
 # -------------------------------------------------------------------------
 
 
-def rename_table(schema_madlib, orig_name, new_name):
-    """
-    Renames possibly schema qualified table name to a new schema qualified name
-    ensuring the schema qualification are changed appropriately
-
-    Args:
-        @param orig_name: string, Original name of the table
-                          (must be schema qualified if table schema is not in search path)
-        @param new_name: string, New name of the table
-                          (can be schema qualified. If it is not then the original
-                           schema is maintained)
-    Returns:
-        String. The new table name qualified with the schema name
-    """
-    new_names_split = new_name.split(".")
-    if len(new_names_split) > 2:
-        raise AssertionError("Invalid table name")
-    new_table_name = new_names_split[-1]
-    new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
-
-    orig_names_split = orig_name.split(".")
-    if len(orig_names_split) > 2:
-        raise AssertionError("Invalid table name")
-
-    if len(orig_names_split) > 1:
-        orig_table_schema = orig_names_split[0]
-    else:
-        # we need to get the schema name of the original table if we are
-        # to change the schema of the new table. This is to ensure that we
-        # change the schema of the correct table in case there are multiple
-        # tables with the same new name.
-        orig_table_schema = get_first_schema(orig_name)
-
-    if orig_table_schema is None:
-        raise AssertionError("Relation {0} not found during rename".
-                             format(orig_name))
-
-    plpy.execute("ALTER TABLE {orig_table} RENAME TO {new_table}".
-                 format(orig_table=orig_name, new_table=new_table_name))
-
-    if new_table_schema:
-        if new_table_schema != orig_table_schema:
-            # set schema only if a change in schema is required
-            before_schema_string = "{0}.{1}".format(orig_table_schema,
-                                                    new_table_name)
-
-            if 'pg_temp' in new_table_schema:
-                # We cannot alter table schemas from/to temp schemas
-                # If it looks like a temp schema, we stay safe and just use
-                # create/drop
-                plpy.execute("""CREATE TABLE {new_name} AS
-                                SELECT * FROM {new_table_name}""".
-                             format(**locals()))
-                drop_tables([new_table_name])
-            else:
-                plpy.execute("""ALTER TABLE {new_table}
-                                SET SCHEMA {schema_name}""".
-                             format(new_table=before_schema_string,
-                                    schema_name=new_table_schema))
-        return new_name
-    else:
-        return orig_table_schema + "." + new_table_name
-# -------------------------------------------------------------------------
-
-
 def get_first_schema(table_name):
     """
     Return first schema name from search path that contains given table.