You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2018/07/13 08:24:39 UTC
madlib git commit: Utils: Add a Python quote_literal for GP platforms
Repository: madlib
Updated Branches:
refs/heads/master e64dba4eb -> 5e47c8e4c
Utils: Add a Python quote_literal for GP platforms
Versions prior to GPBD 6 or Postgresql 9.1 do not provide
plpy.quote_literal which is necessary for building a SQL text array from
a Python list of strings. We work around this limitation by creating
our own quote_literal function that just returns plpy.quote_literal
output for platforms that provide the function. For other platforms, we
compromise by using dollar-quoting (with a obscure tag between the
dollars).
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/5e47c8e4
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/5e47c8e4
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/5e47c8e4
Branch: refs/heads/master
Commit: 5e47c8e4cce205c5ecfda5e2e1d6bdc0a7330603
Parents: e64dba4
Author: Rahul Iyer <ri...@apache.org>
Authored: Thu Jul 12 22:46:07 2018 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Fri Jul 13 00:40:41 2018 -0700
----------------------------------------------------------------------
src/ports/greenplum/cmake/GreenplumUtils.cmake | 3 +-
src/ports/postgres/cmake/PostgreSQLUtils.cmake | 4 +
src/ports/postgres/modules/convex/mlp_igd.py_in | 4 +-
.../postgres/modules/internal/db_utils.py_in | 77 +++++++++++---------
4 files changed, 50 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/greenplum/cmake/GreenplumUtils.cmake
----------------------------------------------------------------------
diff --git a/src/ports/greenplum/cmake/GreenplumUtils.cmake b/src/ports/greenplum/cmake/GreenplumUtils.cmake
index 0fc1637..5ec271e 100644
--- a/src/ports/greenplum/cmake/GreenplumUtils.cmake
+++ b/src/ports/greenplum/cmake/GreenplumUtils.cmake
@@ -9,8 +9,9 @@ function(define_greenplum_features IN_VERSION OUT_FEATURES)
list(APPEND ${OUT_FEATURES} __HAS_FUNCTION_PROPERTIES__)
endif()
- if(${IN_VERSION} VERSION_GREATER "4.3")
+ if(NOT ${IN_VERSION} VERSION_LESS "6.0")
list(APPEND ${OUT_FEATURES} __HAS_BOOL_TO_TEXT_CAST__)
+ list(APPEND ${OUT_FEATURES} __HAS_PLPY_QUOTE_FUNCTIONS__)
endif()
# Pass values to caller
http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/cmake/PostgreSQLUtils.cmake
----------------------------------------------------------------------
diff --git a/src/ports/postgres/cmake/PostgreSQLUtils.cmake b/src/ports/postgres/cmake/PostgreSQLUtils.cmake
index 0139015..e08effe 100644
--- a/src/ports/postgres/cmake/PostgreSQLUtils.cmake
+++ b/src/ports/postgres/cmake/PostgreSQLUtils.cmake
@@ -6,6 +6,10 @@ function(define_postgresql_features IN_VERSION OUT_FEATURES)
list(APPEND ${OUT_FEATURES} __HAS_BOOL_TO_TEXT_CAST__)
endif()
+ if(NOT ${IN_VERSION} VERSION_LESS "9.1")
+ list(APPEND ${OUT_FEATURES} __HAS_PLPY_QUOTE_FUNCTIONS__)
+ endif()
+
# Pass values to caller
set(${OUT_FEATURES} "${${OUT_FEATURES}}" PARENT_SCOPE)
endfunction(define_postgresql_features)
http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 3ab7f45..7df44ec 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -33,7 +33,7 @@ from convex.utils_regularization import __utils_normalize_data_grouping
from internal.db_utils import get_distinct_col_levels
from internal.db_utils import get_one_hot_encoded_expr
-from internal.db_utils import quote_literal_python_list
+from internal.db_utils import quote_literal
from utilities.control import MinWarning
from utilities.in_mem_group_control import GroupIterationController
from utilities.utilities import _array_to_string
@@ -145,7 +145,7 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
dim=2)
if is_classification:
if pp_summary_dict["class_values"]:
- classes = quote_literal_python_list(pp_summary_dict["class_values"])
+ classes = [quote_literal(c) for c in pp_summary_dict["class_values"]]
num_output_nodes = len(classes)
else:
# Assume that the dependent variable is already one-hot-encoded
http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/modules/internal/db_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/internal/db_utils.py_in b/src/ports/postgres/modules/internal/db_utils.py_in
index e82ba91..4c41515 100644
--- a/src/ports/postgres/modules/internal/db_utils.py_in
+++ b/src/ports/postgres/modules/internal/db_utils.py_in
@@ -24,51 +24,58 @@ from utilities.validate_args import get_expr_type
m4_changequote(`<!', `!>')
QUOTE_DELIMITER="$__madlib__$"
+HAS_PLPY_QUOTE_FUNCTIONS = m4_ifdef(<!__HAS_PLPY_QUOTE_FUNCTIONS__!>,
+ <!True!>, <!False!>);
+
def get_distinct_col_levels(source_table, col_name, col_type=None):
- """
- Add description here
- :return:
- """
- if not col_type:
- col_type = get_expr_type(col_name, source_table)
+ """
+ Add description here
+ :return:
+ """
+ if not col_type:
+ col_type = get_expr_type(col_name, source_table)
- if is_psql_char_type(col_type):
- dep_var_text_patched = "quote_literal({0})".format(col_name)
- else:
- dep_var_text_patched = col_name
+ if is_psql_char_type(col_type):
+ dep_var_text_patched = "quote_literal({0})".format(col_name)
+ else:
+ dep_var_text_patched = col_name
- levels = plpy.execute("""
+ levels = plpy.execute("""
SELECT DISTINCT {dep_var_text_patched} AS levels
FROM {source_table}
WHERE ({col_name}) is NOT NULL
""".format(**locals()))
- levels = sorted(l["levels"] for l in levels)
- return levels
+ levels = sorted(l["levels"] for l in levels)
+ return levels
+
def get_one_hot_encoded_expr(col_name, col_levels):
- """
- All the values in col_levels should have been quoted and escaped with
- the sql function `quote_literal`.
- :param col_name:
- :param col_levels:
- :return:
- """
- one_hot_encoded_expr = ["({0}) = {1}".format(col_name, c)
- for c in col_levels]
- return 'ARRAY[{0}]::INTEGER[]'.format(', '.join(one_hot_encoded_expr))
+ """
+ All the values in col_levels should have been quoted and escaped with
+ the sql function `quote_literal`.
+ :param col_name:
+ :param col_levels:
+ :return:
+ """
+ one_hot_encoded_expr = ["({0}) = {1}".format(col_name, c)
+ for c in col_levels]
+ return 'ARRAY[{0}]::INTEGER[]'.format(', '.join(one_hot_encoded_expr))
+# ------------------------------------------------------------------------------
+
-def quote_literal_python_list(values):
- """
- This function will sql quote all the values inside a python list
- :param values:
- :return:
- """
- quoted_values=[]
- for value in values:
- quoted_values.append(plpy.execute(""
- "select quote_literal({0}{1}{0}) as quoted_value".
- format(QUOTE_DELIMITER, value))[0]['quoted_value'])
+def quote_literal(input_str):
+ """ Return the given string suitably quoted to be used as a string literal
+ in an SQL statement string.
- return quoted_values
+ The plpy.quote_literal is not available in GPDB 4.3 - this function is
+ provided as a proxy for that platform. For all other platforms this
+ function, forwards the argument to plpy.quote_literal.
+ """
+ if HAS_PLPY_QUOTE_FUNCTIONS:
+ return plpy.quote_literal(input_str)
+ else:
+ return "{qd}{input_str}{qd}".format(qd=QUOTE_DELIMITER,
+ input_str=input_str)
+# ------------------------------------------------------------------------------