You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nk...@apache.org on 2020/03/24 20:06:32 UTC
[madlib] 01/05: Utilities: Improve rename table logic
This is an automated email from the ASF dual-hosted git repository.
nkak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 5a95e7c93b4effed6191f508624d4dc7333f9987
Author: Nikhil Kak <nk...@pivotal.io>
AuthorDate: Wed Mar 18 17:23:12 2020 -0700
Utilities: Improve rename table logic
JIRA: MADLIB-1406
Consider the example
"rename table foo to bar.foo2"
Previously rename_table would fail if the table 'foo2' already existed on the
search_path. This is because we first renamed the table foo to foo2 and then
set the schema to bar.
The new logic fixes this by renaming foo to a unique string, setting the schema
to bar and then renaming the unique string to bar
Also moved the function to utilities since the new logic required the use of
unique_string. Also it seemed better suited to utilities.py instead of
validate_args.py
Co-authored-by: Domino Valdano <dv...@pivotal.io>
Co-authored-by: Ekta Khanna <ek...@pivotal.io>
---
src/ports/postgres/modules/graph/hits.py_in | 2 +-
src/ports/postgres/modules/graph/pagerank.py_in | 2 +-
src/ports/postgres/modules/graph/wcc.py_in | 2 +-
src/ports/postgres/modules/linalg/svd.py_in | 2 +-
.../modules/regress/clustered_variance.py_in | 2 +-
.../postgres/modules/regress/robust_logistic.py_in | 2 +-
.../modules/regress/robust_mlogistic.py_in | 2 +-
.../postgres/modules/utilities/utilities.py_in | 114 +++++++++++++++++++++
.../postgres/modules/utilities/validate_args.py_in | 65 ------------
9 files changed, 121 insertions(+), 72 deletions(-)
diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index f792f15..1283070 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -47,7 +47,7 @@ from utilities.utilities import is_platform_pg
from utilities.validate_args import columns_exist_in_table, drop_tables
from utilities.validate_args import get_cols_and_types, table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
def validate_hits_args(schema_madlib, vertex_table, vertex_id, edge_table,
edge_params, out_table, max_iter, threshold,
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index 41f25de..e732675 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -49,7 +49,7 @@ from utilities.utilities import py_list_to_sql_string
from utilities.validate_args import columns_exist_in_table, get_cols_and_types
from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 4adc52e..28b06f5 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -37,7 +37,7 @@ from utilities.validate_args import columns_exist_in_table, get_expr_type
from utilities.utilities import is_platform_pg
from utilities.utilities import add_postfix
from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
from utilities.control import MinWarning
from graph_utils import validate_graph_coding, get_graph_usage
from graph_utils import validate_output_and_summary_tables
diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in
index 894fae2..bf0e522 100644
--- a/src/ports/postgres/modules/linalg/svd.py_in
+++ b/src/ports/postgres/modules/linalg/svd.py_in
@@ -11,7 +11,7 @@ from utilities.utilities import _assert
from utilities.utilities import add_postfix
from utilities.validate_args import columns_exist_in_table
from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
from matrix_ops import get_dims
from matrix_ops import validate_sparse
from matrix_ops import validate_dense
diff --git a/src/ports/postgres/modules/regress/clustered_variance.py_in b/src/ports/postgres/modules/regress/clustered_variance.py_in
index 44b042d..8931066 100644
--- a/src/ports/postgres/modules/regress/clustered_variance.py_in
+++ b/src/ports/postgres/modules/regress/clustered_variance.py_in
@@ -8,7 +8,7 @@ from utilities.utilities import unique_string
from utilities.utilities import _string_to_array_with_quotes
from utilities.utilities import extract_keyvalue_params
from utilities.validate_args import columns_exist_in_table
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
from utilities.validate_args import table_is_empty
from utilities.validate_args import table_exists
from utilities.utilities import _assert
diff --git a/src/ports/postgres/modules/regress/robust_logistic.py_in b/src/ports/postgres/modules/regress/robust_logistic.py_in
index 189123b..4bc72a4 100644
--- a/src/ports/postgres/modules/regress/robust_logistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_logistic.py_in
@@ -11,7 +11,7 @@
import plpy
from utilities.utilities import _assert
from utilities.utilities import unique_string
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
# use mad_vec to process arrays passed as strings in GPDB < 4.1 and PG < 9.0
from utilities.utilities import __mad_version
diff --git a/src/ports/postgres/modules/regress/robust_mlogistic.py_in b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
index 897b849..ffabd37 100644
--- a/src/ports/postgres/modules/regress/robust_mlogistic.py_in
+++ b/src/ports/postgres/modules/regress/robust_mlogistic.py_in
@@ -15,7 +15,7 @@ from utilities.utilities import _assert
from utilities.utilities import add_postfix
from utilities.validate_args import table_exists
-from utilities.validate_args import rename_table
+from utilities.utilities import rename_table
from utilities.validate_args import columns_exist_in_table
from regress.robust_linear import _robust_linregr_validate
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 210fbd4..73ff894 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -15,6 +15,7 @@ from validate_args import input_tbl_valid
from validate_args import is_var_valid
from validate_args import output_tbl_valid
from validate_args import quote_ident
+from validate_args import drop_tables
import plpy
@@ -1162,3 +1163,116 @@ def rotate(l, n):
"""
return l[-n:] + l[:-n]
# ------------------------------------------------------------------------------
+
+def rename_table(schema_madlib, orig_name, new_name):
+ """
+ Renames possibly schema qualified table name to a new schema qualified name
+ ensuring the schema qualification are changed appropriately
+
+ Args:
+ @param orig_name: string, Original name of the table
+ (must be schema qualified if table schema is not in search path)
+ @param new_name: string, New name of the table
+ (can be schema qualified. If it is not then the original
+ schema is maintained)
+ Returns:
+ String. The new table name qualified with the schema name
+ """
+ new_names_split = new_name.split(".")
+ if len(new_names_split) > 2:
+ raise AssertionError("Invalid table name")
+ new_table_name = new_names_split[-1]
+ new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
+
+ orig_names_split = orig_name.split(".")
+ if len(orig_names_split) > 2:
+ raise AssertionError("Invalid table name")
+
+ if len(orig_names_split) > 1:
+ orig_table_schema = orig_names_split[0]
+ else:
+ # we need to get the schema name of the original table if we are
+ # to change the schema of the new table. This is to ensure that we
+ # change the schema of the correct table in case there are multiple
+ # tables with the same new name.
+ orig_table_schema = get_first_schema(orig_name)
+
+ if orig_table_schema is None:
+ raise AssertionError("Relation {0} not found during rename".
+ format(orig_name))
+ return __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+ new_table_schema, new_table_name)
+
+def __do_rename_and_get_new_name(orig_name, new_name, orig_table_schema,
+ new_table_schema, new_table_name):
+ """
+ Internal private function to perform the rename operation after all the
+ validation checks
+ """
+
+ """
+ CASE 1
+ If the output table is schema is pg_temp, we cannot alter table schemas from/to
+ temp schemas. If it looks like a temp schema, we stay safe and just use
+ create/drop
+ Test cases
+ foo.bar to pg_temp.bar
+ foo.bar to pg_temp.bar2
+ foo to pg_temp.bar
+ pg_temp.foo to pg_temp.bar
+ """
+ if new_table_schema and 'pg_temp' in new_table_schema:
+ """
+ If both new_table_schema and orig_table_schema have pg_temp in it,
+ just run an alter statement instead of CTAS. Without this, pca dev-check
+ fails on gpdb5/6 (but not on pg)
+ """
+ if new_table_schema != orig_table_schema:
+ plpy.info("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+ .format(**locals()))
+ plpy.execute("""CREATE TABLE {new_name} AS SELECT * FROM {orig_name}"""
+ .format(**locals()))
+ drop_tables([orig_name])
+ return new_name
+ else:
+ plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+ format(**locals()))
+ return new_name
+
+ """
+ CASE 2
+ Do direct rename if the new table does not have an output schema or
+ if the new table schema is the same as the original table schema
+ Test Cases
+ rename foo to bar
+ rename foo.bar to foo.bar2
+ rename foo.bar to bar2
+ """
+ if not new_table_schema or new_table_schema == orig_table_schema:
+ plpy.execute("ALTER TABLE {orig_name} RENAME TO {new_table_name}".
+ format(**locals()))
+ return orig_table_schema + "." + new_table_name
+
+ """
+ CASE 3
+ output table is schema qualified
+ 1. rename the original table to an interim temp name
+ 2. set the new schema on that interim table
+ 3. rename interim table to the new table name
+ Test cases
+ foo.bar to foo2.bar2
+ foo.bar to foo2.bar
+ """
+ interim_temp_name = unique_string("rename_table_helper")
+ plpy.execute(
+ "ALTER TABLE {orig_name} RENAME to {interim_temp_name}".format(
+ **locals()))
+
+ plpy.execute(
+ """ALTER TABLE {interim_temp_name} SET SCHEMA {new_table_schema}""".format(
+ **locals()))
+
+ plpy.execute(
+ """ALTER TABLE {new_table_schema}.{interim_temp_name} RENAME to {new_table_name}"""
+ .format(**locals()))
+ return new_name
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index ac6bd4a..a21be58 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -164,71 +164,6 @@ def table_exists(tbl, only_first_schema=False):
# -------------------------------------------------------------------------
-def rename_table(schema_madlib, orig_name, new_name):
- """
- Renames possibly schema qualified table name to a new schema qualified name
- ensuring the schema qualification are changed appropriately
-
- Args:
- @param orig_name: string, Original name of the table
- (must be schema qualified if table schema is not in search path)
- @param new_name: string, New name of the table
- (can be schema qualified. If it is not then the original
- schema is maintained)
- Returns:
- String. The new table name qualified with the schema name
- """
- new_names_split = new_name.split(".")
- if len(new_names_split) > 2:
- raise AssertionError("Invalid table name")
- new_table_name = new_names_split[-1]
- new_table_schema = new_names_split[0] if len(new_names_split) > 1 else None
-
- orig_names_split = orig_name.split(".")
- if len(orig_names_split) > 2:
- raise AssertionError("Invalid table name")
-
- if len(orig_names_split) > 1:
- orig_table_schema = orig_names_split[0]
- else:
- # we need to get the schema name of the original table if we are
- # to change the schema of the new table. This is to ensure that we
- # change the schema of the correct table in case there are multiple
- # tables with the same new name.
- orig_table_schema = get_first_schema(orig_name)
-
- if orig_table_schema is None:
- raise AssertionError("Relation {0} not found during rename".
- format(orig_name))
-
- plpy.execute("ALTER TABLE {orig_table} RENAME TO {new_table}".
- format(orig_table=orig_name, new_table=new_table_name))
-
- if new_table_schema:
- if new_table_schema != orig_table_schema:
- # set schema only if a change in schema is required
- before_schema_string = "{0}.{1}".format(orig_table_schema,
- new_table_name)
-
- if 'pg_temp' in new_table_schema:
- # We cannot alter table schemas from/to temp schemas
- # If it looks like a temp schema, we stay safe and just use
- # create/drop
- plpy.execute("""CREATE TABLE {new_name} AS
- SELECT * FROM {new_table_name}""".
- format(**locals()))
- drop_tables([new_table_name])
- else:
- plpy.execute("""ALTER TABLE {new_table}
- SET SCHEMA {schema_name}""".
- format(new_table=before_schema_string,
- schema_name=new_table_schema))
- return new_name
- else:
- return orig_table_schema + "." + new_table_name
-# -------------------------------------------------------------------------
-
-
def get_first_schema(table_name):
"""
Return first schema name from search path that contains given table.