You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2021/03/05 13:29:04 UTC

[madlib] branch master updated (7eeb29c -> 14a91ce)

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git.


    from 7eeb29c  clarify example in user docs for loading model arch
     new 4b87a71  DL: Check if the owner of the object table is a superuser
     new 14a91ce  update user docs with security warnings

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../madlib_keras_custom_function.py_in             |  8 ++--
 .../madlib_keras_custom_function.sql_in            | 54 ++++++++++++++++------
 .../deep_learning/madlib_keras_validator.py_in     |  8 ++++
 .../postgres/modules/utilities/utilities.py_in     | 11 +++++
 4 files changed, 61 insertions(+), 20 deletions(-)


[madlib] 01/02: DL: Check if the owner of the object table is a superuser

Posted by ok...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 4b87a71ba1f8b6036f172fbda573a5626f1c8482
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Thu Feb 25 20:30:43 2021 +0300

    DL: Check if the owner of the object table is a superuser
---
 .../modules/deep_learning/madlib_keras_custom_function.py_in  |  8 +++-----
 .../modules/deep_learning/madlib_keras_validator.py_in        |  8 ++++++++
 src/ports/postgres/modules/utilities/utilities.py_in          | 11 +++++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
index 1ebf9f6..32a5757 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
@@ -128,13 +128,11 @@ def load_custom_function(schema_madlib, object_table, object, name, description=
 def delete_custom_function(schema_madlib, object_table, id=None, name=None, **kwargs):
 
     if object_table is not None:
-        schema_name = get_schema(object_table)
-        if schema_name is None:
-            object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
-        elif schema_name != schema_madlib:
-            plpy.error("DL: Custom function table has to be in the {0} schema".format(schema_madlib))
+        object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
 
     input_tbl_valid(object_table, "Keras Custom Funtion")
+    _assert(is_superuser(current_user()), "DL: The user has to have admin "\
+        "privilages to delete a custom function")
     _assert(id is not None or name is not None,
             "{0}: function id/name cannot be NULL! " \
             "Use \"SELECT delete_custom_function('usage')\" for help.".format(module_name))
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index 535d70d..ab8d336 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -40,6 +40,8 @@ from utilities.utilities import _assert
 from utilities.utilities import add_postfix
 from utilities.utilities import is_platform_pg
 from utilities.utilities import is_var_valid
+from utilities.utilities import is_superuser
+from utilities.utilities import get_table_owner
 from utilities.validate_args import cols_in_tbl_valid
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import get_expr_type
@@ -324,6 +326,9 @@ class FitCommonValidator(object):
 
         if self.object_table is not None:
             input_tbl_valid(self.object_table, self.module_name)
+
+            _assert(is_superuser(get_table_owner(self.object_table)),
+                "DL: Cannot use a table of a non-superuser as object table.")
             cols_in_tbl_valid(self.object_table, CustomFunctionSchema.col_names, self.module_name)
 
         if self.warm_start:
@@ -543,6 +548,7 @@ class MstLoaderInputValidator():
         # Default metrics, since it is not part of the builtin metrics list
         builtin_metrics.append('accuracy')
         if self.object_table is not None:
+
             res = plpy.execute("SELECT {0} from {1}".format(CustomFunctionSchema.FN_NAME,
                                                             self.object_table))
             for r in res:
@@ -576,6 +582,8 @@ class MstLoaderInputValidator():
         input_tbl_valid(self.model_arch_table, self.module_name)
         if self.object_table is not None:
             input_tbl_valid(self.object_table, self.module_name)
+            _assert(is_superuser(get_table_owner(self.object_table)),
+                "DL: Cannot use a table of a non-superuser as object table.")
         if self.module_name == 'load_model_selection_table' or self.module_name == 'madlib_keras_automl':
             output_tbl_valid(self.model_selection_table, self.module_name)
             output_tbl_valid(self.model_selection_summary_table, self.module_name)
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index e5a4c3d..8fc4a28 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -775,6 +775,17 @@ def is_superuser(user):
     return plpy.execute("SELECT rolsuper FROM pg_catalog.pg_roles "\
                         "WHERE rolname = '{0}'".format(user))[0]['rolsuper']
 
+def get_table_owner(schema_table):
+
+    split_table = schema_table.split(".",1)
+    schema = split_table[0]
+    non_schema_table = split_table[1]
+
+    q = """SELECT tableowner FROM pg_catalog.pg_tables
+           WHERE schemaname='{0}' AND tablename='{1}'
+           """.format(schema, non_schema_table)
+    return plpy.execute(q)[0]['tableowner']
+
 def madlib_version(schema_madlib):
     """Returns the MADlib version string."""
     raw = plpy.execute("""


[madlib] 02/02: update user docs with security warnings

Posted by ok...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 14a91cef3b89489be6d8110c8364a6c0662516c4
Author: Frank McQuillan <fm...@pivotal.io>
AuthorDate: Thu Mar 4 15:01:56 2021 -0800

    update user docs with security warnings
---
 .../madlib_keras_custom_function.sql_in            | 54 ++++++++++++++++------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in
index 3046891..2bf3c56 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in
@@ -41,6 +41,15 @@ m4_include(`SQLCommon.m4')
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul></div>
 
+\warning <em> 
+For security reasons there are controls on custom functions in MADlib.
+You must be a superuser to create custom functions because they
+could theoretically allow execution of any untrusted Python code.
+Regular users with MADlib USAGE permission can use existing custom 
+functions but cannot create new ones or update existing ones.
+See references [1] and [2] for information 
+on privileges in Greenplum and PostgreSQL. </em>
+
 This function loads custom Python functions
 into a table for use by deep learning algorithms.
 
@@ -48,9 +57,9 @@ Custom functions can be useful if, for example, you need loss functions
 or metrics that are not built into the standard libraries.
 The functions to be loaded must be in the form of serialized Python objects
 created using Dill, which extends Python's pickle module to the majority
-of the built-in Python types [1].
+of the built-in Python types [3].
 
-Custom functions are also used to return top k categorical accuracy rate
+Custom functions can also be used to return top k categorical accuracy
 in the case that you want a different k value than the default from Keras.
 This module includes a helper function to create the custom function
 automatically for a specified k.
@@ -58,12 +67,18 @@ automatically for a specified k.
 There is also a utility function to delete a function
 from the table.
 
+@note
+Do not specify a schema for the argument 'object_table' containing the Python objects, 
+because the 'object_table' is automatically put in the MADlib schema.
+Also, any subsequent SQL queries on this table by regular users must 
+specify '<madlib_schema>.object_table' in the usual way.
+
 @anchor load_function
 @par Load Function
 
 <pre class="syntax">
 load_custom_function(
-    object table,
+    object_table,
     object,
     name,
     description
@@ -71,10 +86,12 @@ load_custom_function(
 </pre>
 \b Arguments
 <dl class="arglist">
-  <dt>object table</dt>
+  <dt>object_table</dt>
   <dd>VARCHAR. Table to load serialized Python objects.  If this table
   does not exist, it will be created.  If this table already
   exists, a new row is inserted into the existing table.
+  Do not specify schema as part of the object table name, since
+  it will be put in the MADlib schema automatically.
   </dd>
 
   <dt>object</dt>
@@ -84,7 +101,7 @@ load_custom_function(
 
   @note
   The Dill package must be installed on all segments of the
-  database cluster [1].
+  database cluster [3].
   </dd>
 
   <dt>name</dt>
@@ -148,6 +165,7 @@ delete_custom_function(
 <dl class="arglist">
   <dt>object_table</dt>
     <dd>VARCHAR. Table containing Python object to be deleted.
+    Do not specify schema as part of the object table name.
   </dd>
   <dt>id</dt>
     <dd>INTEGER. The id of the object to be deleted.
@@ -161,22 +179,24 @@ delete_custom_function(
 @par Top k Accuracy Function
 
 Create and load a custom function for a specific k into the custom functions table.
-The Keras accuracy parameter 'top_k_categorical_accuracy' returns top 5 accuracy by default [2].
+The Keras accuracy parameter 'top_k_categorical_accuracy' returns top 5 accuracy by default [4].
 If you want a different top k value, use this helper function to create a custom
 Python function to compute the top k accuracy that you specify.
 
 <pre class="syntax">
 load_top_k_accuracy_function(
-    object table,
+    object_table,
     k
     )
 </pre>
 \b Arguments
 <dl class="arglist">
-  <dt>object table</dt>
+  <dt>object_table</dt>
   <dd>VARCHAR. Table to load serialized Python objects.  If this table
   does not exist, it will be created.  If this table already
   exists, a new row is inserted into the existing table.
+  Do not specify schema as part of the object table name, since
+  it will be put in the MADlib schema automatically.
   </dd>
 
   <dt>k</dt>
@@ -236,14 +256,14 @@ def rmse(y_true, y_pred):
     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
 pb_rmse=dill.dumps(rmse)
 \# call load function
-cur.execute("DROP TABLE IF EXISTS custom_function_table")
+cur.execute("DROP TABLE IF EXISTS madlib.custom_function_table")
 cur.execute("SELECT madlib.load_custom_function('custom_function_table',  %s,'squared_error', 'squared error')", [p2.Binary(pb_squared_error)])
 cur.execute("SELECT madlib.load_custom_function('custom_function_table',  %s,'rmse', 'root mean square error')", [p2.Binary(pb_rmse)])
 conn.commit()
 </pre>
 List table to see objects:
 <pre class="example">
-SELECT id, name, description FROM custom_function_table ORDER BY id;
+SELECT id, name, description FROM madlib.custom_function_table ORDER BY id;
 </pre>
 <pre class="result">
  id |     name      |      description
@@ -276,7 +296,7 @@ $$ language plpythonu;
 </pre>
 Now call loader:
 <pre class="result">
-DROP TABLE IF EXISTS custom_function_table;
+DROP TABLE IF EXISTS madlib.custom_function_table;
 SELECT madlib.load_custom_function('custom_function_table',
                                    custom_function_squared_error(),
                                    'squared_error',
@@ -289,7 +309,7 @@ SELECT madlib.load_custom_function('custom_function_table',
 -# Delete an object by id:
 <pre class="example">
 SELECT madlib.delete_custom_function( 'custom_function_table', 1);
-SELECT id, name, description FROM custom_function_table ORDER BY id;
+SELECT id, name, description FROM madlib.custom_function_table ORDER BY id;
 </pre>
 <pre class="result">
  id | name |      description
@@ -309,7 +329,7 @@ SELECT madlib.load_top_k_accuracy_function('custom_function_table',
                                            3);
 SELECT madlib.load_top_k_accuracy_function('custom_function_table',
                                            10);
-SELECT id, name, description FROM custom_function_table ORDER BY id;
+SELECT id, name, description FROM madlib.custom_function_table ORDER BY id;
 </pre>
 <pre class="result">
  id |      name       |       description
@@ -320,9 +340,13 @@ SELECT id, name, description FROM custom_function_table ORDER BY id;
 @anchor literature
 @literature
 
-[1] Python catalog for Dill package https://pypi.org/project/dill/
+[1] https://gpdb.docs.pivotal.io/latest/admin_guide/roles_privs.html
+
+[2] https://www.postgresql.org/docs/current/ddl-priv.html
+
+[3] Python catalog for Dill package https://pypi.org/project/dill/
 
-[2] https://keras.io/api/metrics/accuracy_metrics/#topkcategoricalaccuracy-class
+[4] https://keras.io/api/metrics/accuracy_metrics/#topkcategoricalaccuracy-class
 
 @anchor related
 @par Related Topics