You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2018/07/13 08:24:39 UTC

madlib git commit: Utils: Add a Python quote_literal for GP platforms

Repository: madlib
Updated Branches:
  refs/heads/master e64dba4eb -> 5e47c8e4c


Utils: Add a Python quote_literal for GP platforms

Versions prior to GPBD 6 or Postgresql 9.1 do not provide
plpy.quote_literal which is necessary for building a SQL text array from
a Python list of strings.  We work around this limitation by creating
our own quote_literal function that just returns plpy.quote_literal
output for platforms that provide the function. For other platforms, we
compromise by using dollar-quoting (with a obscure tag between the
dollars).


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/5e47c8e4
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/5e47c8e4
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/5e47c8e4

Branch: refs/heads/master
Commit: 5e47c8e4cce205c5ecfda5e2e1d6bdc0a7330603
Parents: e64dba4
Author: Rahul Iyer <ri...@apache.org>
Authored: Thu Jul 12 22:46:07 2018 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Fri Jul 13 00:40:41 2018 -0700

----------------------------------------------------------------------
 src/ports/greenplum/cmake/GreenplumUtils.cmake  |  3 +-
 src/ports/postgres/cmake/PostgreSQLUtils.cmake  |  4 +
 src/ports/postgres/modules/convex/mlp_igd.py_in |  4 +-
 .../postgres/modules/internal/db_utils.py_in    | 77 +++++++++++---------
 4 files changed, 50 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/greenplum/cmake/GreenplumUtils.cmake
----------------------------------------------------------------------
diff --git a/src/ports/greenplum/cmake/GreenplumUtils.cmake b/src/ports/greenplum/cmake/GreenplumUtils.cmake
index 0fc1637..5ec271e 100644
--- a/src/ports/greenplum/cmake/GreenplumUtils.cmake
+++ b/src/ports/greenplum/cmake/GreenplumUtils.cmake
@@ -9,8 +9,9 @@ function(define_greenplum_features IN_VERSION OUT_FEATURES)
         list(APPEND ${OUT_FEATURES} __HAS_FUNCTION_PROPERTIES__)
     endif()
 
-    if(${IN_VERSION} VERSION_GREATER "4.3")
+    if(NOT ${IN_VERSION} VERSION_LESS "6.0")
         list(APPEND ${OUT_FEATURES} __HAS_BOOL_TO_TEXT_CAST__)
+        list(APPEND ${OUT_FEATURES} __HAS_PLPY_QUOTE_FUNCTIONS__)
     endif()
 
     # Pass values to caller

http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/cmake/PostgreSQLUtils.cmake
----------------------------------------------------------------------
diff --git a/src/ports/postgres/cmake/PostgreSQLUtils.cmake b/src/ports/postgres/cmake/PostgreSQLUtils.cmake
index 0139015..e08effe 100644
--- a/src/ports/postgres/cmake/PostgreSQLUtils.cmake
+++ b/src/ports/postgres/cmake/PostgreSQLUtils.cmake
@@ -6,6 +6,10 @@ function(define_postgresql_features IN_VERSION OUT_FEATURES)
         list(APPEND ${OUT_FEATURES} __HAS_BOOL_TO_TEXT_CAST__)
     endif()
 
+    if(NOT ${IN_VERSION} VERSION_LESS "9.1")
+        list(APPEND ${OUT_FEATURES} __HAS_PLPY_QUOTE_FUNCTIONS__)
+    endif()
+
     # Pass values to caller
     set(${OUT_FEATURES} "${${OUT_FEATURES}}" PARENT_SCOPE)
 endfunction(define_postgresql_features)

http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 3ab7f45..7df44ec 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -33,7 +33,7 @@ from convex.utils_regularization import __utils_normalize_data_grouping
 
 from internal.db_utils import get_distinct_col_levels
 from internal.db_utils import get_one_hot_encoded_expr
-from internal.db_utils import quote_literal_python_list
+from internal.db_utils import quote_literal
 from utilities.control import MinWarning
 from utilities.in_mem_group_control import GroupIterationController
 from utilities.utilities import _array_to_string
@@ -145,7 +145,7 @@ def mlp(schema_madlib, source_table, output_table, independent_varname,
                                             dim=2)
         if is_classification:
             if pp_summary_dict["class_values"]:
-                classes = quote_literal_python_list(pp_summary_dict["class_values"])
+                classes = [quote_literal(c) for c in pp_summary_dict["class_values"]]
                 num_output_nodes = len(classes)
             else:
                 # Assume that the dependent variable is already one-hot-encoded

http://git-wip-us.apache.org/repos/asf/madlib/blob/5e47c8e4/src/ports/postgres/modules/internal/db_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/internal/db_utils.py_in b/src/ports/postgres/modules/internal/db_utils.py_in
index e82ba91..4c41515 100644
--- a/src/ports/postgres/modules/internal/db_utils.py_in
+++ b/src/ports/postgres/modules/internal/db_utils.py_in
@@ -24,51 +24,58 @@ from utilities.validate_args import get_expr_type
 m4_changequote(`<!', `!>')
 
 QUOTE_DELIMITER="$__madlib__$"
+HAS_PLPY_QUOTE_FUNCTIONS = m4_ifdef(<!__HAS_PLPY_QUOTE_FUNCTIONS__!>,
+                                    <!True!>, <!False!>);
+
 
 def get_distinct_col_levels(source_table, col_name, col_type=None):
-	"""
-	Add description here
-	:return:
-	"""
-	if not col_type:
-		col_type = get_expr_type(col_name, source_table)
+    """
+    Add description here
+    :return:
+    """
+    if not col_type:
+        col_type = get_expr_type(col_name, source_table)
 
-	if is_psql_char_type(col_type):
-		dep_var_text_patched = "quote_literal({0})".format(col_name)
-	else:
-		dep_var_text_patched = col_name
+    if is_psql_char_type(col_type):
+        dep_var_text_patched = "quote_literal({0})".format(col_name)
+    else:
+        dep_var_text_patched = col_name
 
-	levels = plpy.execute("""
+    levels = plpy.execute("""
                 SELECT DISTINCT {dep_var_text_patched} AS levels
                 FROM {source_table}
                 WHERE ({col_name}) is NOT NULL
                 """.format(**locals()))
 
-	levels = sorted(l["levels"] for l in levels)
-	return levels
+    levels = sorted(l["levels"] for l in levels)
+    return levels
+
 
 def get_one_hot_encoded_expr(col_name, col_levels):
-	"""
-	All the values in col_levels should have been quoted and escaped with
-	the sql function `quote_literal`.
-	:param col_name:
-	:param col_levels:
-	:return:
-	"""
-	one_hot_encoded_expr = ["({0}) = {1}".format(col_name, c)
-					for c in col_levels]
-	return 'ARRAY[{0}]::INTEGER[]'.format(', '.join(one_hot_encoded_expr))
+    """
+    All the values in col_levels should have been quoted and escaped with
+    the sql function `quote_literal`.
+    :param col_name:
+    :param col_levels:
+    :return:
+    """
+    one_hot_encoded_expr = ["({0}) = {1}".format(col_name, c)
+                            for c in col_levels]
+    return 'ARRAY[{0}]::INTEGER[]'.format(', '.join(one_hot_encoded_expr))
+# ------------------------------------------------------------------------------
+
 
-def quote_literal_python_list(values):
-	"""
-	This function will sql quote all the values inside a python list
-	:param values:
-	:return:
-	"""
-	quoted_values=[]
-	for value in values:
-		quoted_values.append(plpy.execute(""
-		"select quote_literal({0}{1}{0}) as quoted_value".
-		format(QUOTE_DELIMITER, value))[0]['quoted_value'])
+def quote_literal(input_str):
+    """ Return the given string suitably quoted to be used as a string literal
+        in an SQL statement string.
 
-	return quoted_values
+        The plpy.quote_literal is not available in GPDB 4.3 - this function is
+        provided as a proxy for that platform. For all other platforms this
+        function, forwards the argument to plpy.quote_literal.
+    """
+    if HAS_PLPY_QUOTE_FUNCTIONS:
+        return plpy.quote_literal(input_str)
+    else:
+        return "{qd}{input_str}{qd}".format(qd=QUOTE_DELIMITER,
+                                            input_str=input_str)
+# ------------------------------------------------------------------------------